Is there a better of parsing the below string instead of doing a strtok() to get each field.
"subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000"
Basically I want to retrieve the value for each field into another char buf's.
Here is my code. Just wanted to know if there is any other better way of doing it (any better string parsing algos)
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#define SUBJECT "subject="
#define CC_LIST "cc="
#define SERVER "server="
static void
get_value (const char *tok, char **rval_buf, size_t field_len)
{
size_t val_size = 0;
if (!tok || !rval_buf)
return;
val_size = strlen(tok + field_len) + 1;
*rval_buf = calloc(1, val_size);
if (*rval_buf) {
strlcpy(*rval_buf, tok + field_len, val_size);
}
}
int
main (int argc, char **argv)
{
/* hard coded buf for testing */
char buf[] = "subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000";
char *subject_text = NULL;
char *cc_list = NULL;
char *server_addr = NULL;
char *tok = NULL;
int field_len = 0;
int val_len = 0;
tok = strtok(buf, "&");
while(tok) {
/*
* Handle the token
*/
/* check if it is subject */
if (strstr(tok, SUBJECT)) {
get_value(tok, &subject_text, strlen(SUBJECT));
} else if (strstr(tok, CC_LIST)) { /* check if it is CC */
get_value(tok, &cc_list, strlen(CC_LIST));
} else if (strstr(tok, SERVER)) { /* check if it is server */
get_value(tok, &server_addr, strlen(SERVER));
}
tok = strtok(NULL, "&");
}
/* dump data */
fprintf(stdout, "\nSUBJECT: \"%s\"\nCC_LIST: \"%s\"\nSERVER: \"%s\" \n\n",
subject_text, cc_list, server_addr);
return EXIT_SUCCESS;
}
strstr searches for one string ("the needle") inside another ("the haystack"), but you really only want to know whether the needle is the beginning of the haystack.
Here's a small suggestion: (requires #include <stdbool> or change the booleans to ints. I like bools.)
static bool
getval(const char* haystack, const char** res, const char* needle, size_t len) {
if (haystack && 0 == strncmp(haystack, needle, len)) {
*res = strdup(haystack + len);
return true;
}
return false;
}
and later:
for (tok = strtok(buf, "&"); tok; tok = strtok(NULL, "&")) {
getval(tok, &subject_text, SUBJECT, strlen(SUBJECT)) ||
getval(tok, &cc_list, CC_LIST, strlen(CC_LIST)) ||
getval(tok, &server_addr, SERVER, strlen(SERVER));
}
You can actually get away with doing the strlen inside of getval, which cuts down a lot on the noise, because most modern compilers are clever enough to inline getval and constant-fold the length of a constant string.
Use strtok()
char *strtok(char *str, const char *delim)
You can put '&' as a delimeter
I wrote a quick-n-dirty splitter for you:
int split(char* input, char delim, char*** parts)
{
int count = 1;
char** result;
char* t = input;
while(*t != '\0')
{
if (*t++ == delim)
{
count++;
}
}
result = (char**)malloc(count * sizeof(char*));
t = input;
int i = 0;
result[i] = input;
while(*t != '\0')
{
if (*t == delim)
{
*t = '\0';
result[++i] = ++t;
}
else
{
t++;
}
}
*parts = result;
return count;
}
int main()
{
char raw[] = "subject=\"some text\"&cc=abcd&server=acd.com";
char* str = _strdup(raw);
char** parts;
char** keyval;
int cnt = split(str, '&', &parts);
for(int i=0; i<cnt; ++i)
{
split(parts[i], '=', &keyval);
printf("[%d]: %s <--> %s\n", i, keyval[0], keyval[1]);
free(keyval);
}
free(parts);
getchar();
return 0;
}
Output
[0]: subject <--> "some text"
[1]: cc <--> abcd
[2]: server <--> acd.com
Related
I am trying to enhance the string splitter by splits on : char. Original version can be found at string splitter - how is it working
I do not want to use MAX_TOKEN_SIZE, I want the buffer to be just enough to hold each token. I added malloc and realloc as follows but I am getting free(): double free detected in tcache 2 error which I do not understand. How am I double freeing ? Thanks for all your help.
PS: Based on Gerhardh's comments, I modified the code as follows, but now I am getting segfault.
PS: Based on user3121023's comments, I added parenthesis around *token in 2 places and it works now.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
# define GROWBY 32
const char* splitter(const char *str, char delimiter, char **token) {
size_t i = 0;
size_t buflen = 32;
while (*str) {
if ( i == buflen) {
buflen += GROWBY;
printf("gowing buffer\n");
char *new_token = realloc(*token, buflen * sizeof **token);
if (new_token == NULL){
fprintf(stderr, "Out of Memory");
abort();
}
*token = new_token; //set the new pointer to old pointer
}
char c = *(str++);
if (c == delimiter)
break;
(*token)[i++] = c; //user3121023
}
(*token)[i] = '\0'; /* set the null terminator, user3121023 */
return str;
}
int main(){
const char *env =
"/bin/svsgerertegdfyufdujhdcjxbcn:/sbin:::/usr/bin/46526vw67vxshgshnxxcxhcgbxhcbxn";
while (*env){
char *token = malloc(GROWBY * sizeof(char));
env = splitter(env, ':', &token);
if (token[0] == '\0') {
strcpy(token, "./");
}
printf("%s\n", token) ;
free(token);
}
return 0;
}
Try using strcspn to advance to the next delimiter.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const char* splitter(const char *str, char *delimiter, char **token) {
size_t buflen = 0;
size_t extra = 0;
buflen = strcspn ( str, delimiter); // characters to next delimiter
extra = 1;
if ( ! buflen) {
extra = 3; // need space for "./" copy in main
}
char *new_token = realloc(*token, ( buflen + extra) * sizeof **token);
if (new_token == NULL){
fprintf(stderr, "Out of Memory");
abort();
}
*token = new_token; //set the new pointer to old pointer
strncpy ( *token, str, buflen);
(*token)[buflen] = 0;
str += buflen;
if ( *str) {
str += 1;
}
return str;
}
int main(void){
const char *env =
"/bin/svsgerertegdfyufdujhdcjxbcn:/sbin:::/usr/bin/46526vw67vxshgshnxxcxhcgbxhcbxn";
while (*env){
char *token = NULL;
env = splitter(env, ":", &token); // note " instead of '
if (token[0] == '\0') {
strcpy(token, "./");
}
printf("%s\n", token) ;
free(token);
}
return 0;
}
Suppose my string is: haha "lol"
I want to extract only lol
#include<stdio.h>
int main() {
char a[20]={0};
char *s="haha \"lol\"";
if(sscanf(s,"%*[^\"]'%[^\"]\"",a)==1){
printf("Found stuff inside quotes");
}
}
By applying a proper parser for the source language that you are parsing.
One-liners for parsing input are often fragile and hard get right.
That said, you can try with something like
const char *input = "haha \"lol\"";
char quoted[32];
if(sscanf(input, "%*[^\"]\"%31[^\"]\"", quoted) == 1)
{
printf("got '%s'\n", quoted);
}
This should be hardened but is enough to get you started.
Sometimes a little code goes a long way. All that is need is 2 calls to strchr()
extract_quoted_string() changed to pseudo-code.
const char *extract_quoted_string(const char *s, size_t *sz) {
const char *left = look_for_quote_start_at_s;
if (failure?) {
return NULL;
}
Update_left_to_the_next_possible_position
const char *right = look_for_quote_start_at_updated_left;
if (failure?) {
return NULL;
}
Compute_and_save_size_based_on_left_and_right
return left;
}
Test harness
void test(const char *s) {
printf("<%s> --> ", s);
size_t sz;
const char *extract = extract_quoted_string(s, &sz);
if (extract) {
printf("<%.*s>\n", (int) sz, extract);
} else {
printf("None\n");
}
}
int main() {
test("");
test("123");
test("\"123");
test("123\"");
test("\"123\"");
test("abc\"123");
test("abc\"123\"");
test("123\"xyz");
test("\"123\"xyz");
test("abc\"123\"xyz");
}
Expected output
<> --> None
<123> --> None
<"123> --> None
<123"> --> None
<"123"> --> <123>
<abc"123> --> None
<abc"123"> --> <123>
<123"xyz> --> None
<"123"xyz> --> <123>
<abc"123"xyz> --> <123>
I think that it is enough in the answer by unwind (even if you add code according to requirements)
A question similar to this question already exists.
Split unquoted string in C
When using that approach you can write follows:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ESCAPE '\\' //ESCAPE CHARACTER
typedef struct token {
const char *top;
const char *end;//point to next character
} Token;
Token getToken(const char **sp, char sep){
const char *s = *sp;
const char *top, *end;
Token token = { NULL, NULL};
while(*s && *s == sep)//skip top separators
++s;
if(!*s){
*sp = s;
return token;
}
token.top = s;
while(*s && *s != sep){
if(*s == ESCAPE)
++s;
else if(*s == '"'){
char *p = strchr(s + 1, '"');//search end '"'
while(p && p[-1] == ESCAPE)
p = strchr(p + 1, '"');
if(p)
s = p;
}
++s;
}
token.end = s;
*sp = s;
return token;
}
char *remove_escape(char *s){
char *from, *to;
from = to = s;
while(*from){
if(*from != ESCAPE)
*to++ = *from;
++from;
}
*to = 0;
return s;
}
char *ft_strsub(Token token){
size_t len = token.end - token.top;
char *sub = malloc(len + 1);//check return value
if (sub){
memcpy(sub, token.top, len);
sub[len] = 0;
}
return sub;
}
int main(int argc, char **argv){
char *str = NULL;
const char *s="haha \"lol\"";
Token token = getToken(&s, ' ');
while(token.top != NULL){
str = ft_strsub(token);
remove_escape(str);
if(*str == '"')//find it!
break;
free(str);
token = getToken(&s, ' ');
}
if(str){
printf("Found stuff inside quotes: ");
//remove "
size_t len = strlen(str);
str[len-1] = 0;
printf("'%s'\n", str + 1);//ignore first character or use memmove
free(str);
}
return 0;
}
In the code below, in "parse" function I am trying to get substring from the string "line". I am successfully printing the "method" variable, but "requesttarget" and "httpversion" variables are empty for some reason.
(ps all these printf's are also inside my parse function)
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
//prototypes
bool parse(const char* line, char* abs_path, char* query);
int strindex(char** pos, const char* str);
void substr(int start, int end, char* holder, const char* line);
int main(void)
{
const char* line = "GET /hello.php?name=Alice HTTP/1.1";
char* abs_path = NULL;
char* query = NULL;
if(parse(line, abs_path, query))
{
printf("It works!\n");
}
}
bool parse(const char* line, char* abs_path, char* query)
{
char* space;
int firstspace;
int secondspace;
char* method = malloc(50 * sizeof(char));
char* requesttarget = malloc(50 * sizeof(char));
char* httpversion = malloc(50 * sizeof(char));
space = strchr(line, ' ');
printf("%p\n", space);
//checks if strchr returns
if(space == NULL)
{
return false;
}
//index in INT of the character
firstspace = strindex(&space, line);
printf("%i\n", firstspace);
//stores the method
substr(0, firstspace, method, line);
space = strrchr(line, ' ');
printf("%p\n", space);
//index in INT of the character
secondspace = strindex(&space, line);
printf("%i\n", secondspace);
//checks if strchr returns
if(space == NULL)
{
return false;
}
//firstspace should come before secondspace
if(firstspace > secondspace)
{
return false;
}
//stores request - target
substr(firstspace + 1, secondspace, requesttarget, line);
//stores http-version
substr(secondspace + 1, strlen(line), httpversion, line);
printf("method: %s\n", method);
printf("requesttarget: %s\n", requesttarget);
printf("httpversion: %s\n", httpversion);
return true;
}
int strindex(char** pos, const char* str)
{
for(int i = 0, n = strlen(str); i < n; i++)
{
if((str + i) == *pos)
{
return i;
}
}
return -1;
}
void substr(int start, int end, char* holder, const char* line)
{
//char* holder = malloc(50 * sizeof(char));
int i = start;
for(; i < end; i++)
{
holder[i] = line[i];
}
holder[i] = '\0';
//return holder;
}
void substr(int start, int end, char* holder, const char* line)
{
//char* holder = malloc(50 * sizeof(char));
int i = start, j=0;
for(; i < end; i++)
{
holder[j++] = line[i];
}
holder[j] = '\0';
//return holder;
}
you were not storing data in holder from 2nd iteration properly.
from 2nd iteration start = 3 and end = 25. While storing in holder your index starts from 3, which is correct for line but not for holder.
Add one more variable to start the index for holder from 0.
A smaller version: (untested)
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
const char* line = "GET /hello.php?name=Alice HTTP/1.1";
char method[32], request[1024], version[32], *src, *dest, *end;
for(src=line, end=(dest=method)+sizeof(method)-1 ; *src!='\0' && *src!=' ' && dest<end; src++, dest++) *dest=*src;
*dest='\0';
while(*src==' ') src++;
for(end=(dest=request)+sizeof(request)-1 ; *src!='\0' && *src!=' ' && dest<end; src++, dest++) *dest=*src;
*dest='\0';
while(*src==' ') src++;
for(end=(dest=version)+sizeof(version)-1 ; *src!='\0' && *src!=' ' && dest<end; src++, dest++) *dest=*src;
*dest='\0';
printf("method: %s\n", method);
printf("requesttarget: %s\n", request);
printf("httpversion: %s\n", version);
}
i use strtok_r like:
char *the_sting = "a|b||e|f";
char *last;
char *current;
current = (char*)strtok_r(the_sting, "|", &last);
while(current != NULL)
{
printf(current);
printf("\n");
current = (char*)strtok_r(NULL, "|", &last);
}
i get:
>>a
>>b
>>e
>>f
the problem is, that i need 'blank' when there is nothing between the delimiters.
like:
>>a
>>b
>>
>>e
>>f
Compare the current current with the previous current. If the difference is more than strlen(previous_current) + 1 then one or more empty places was skipped.
Then strtok_r is not your function, furthermore, you can't use a string literal char *the_sting = "a|b||e|f"; because strtok_r modifies such string, use an array instead char the_sting[] = "a|b||e|f";
And finally, do not use printf in this way printf(current); (is dangerous), instead:
printf("%s", current);
This little function does what you want:
#include <stdio.h>
#include <string.h>
char *scan(char **pp, char c)
{
char *s, *p;
p = strchr(*pp, c);
if (p) *p++ = '\0';
s = *pp;
*pp = p;
return s;
}
int main(void)
{
char the_sting[] = "a|b||e|f"; /* I think you mean the_string here */
char *s, *p = the_sting;
while (p) {
s = scan(&p, '|');
printf("<%s>", s);
}
return 0;
}
Note that a simple char (not a string) is used as delimiter
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *my_strtok_r(char *str, const char *delims, char **store){
char *p, *wk;
if(str != NULL){
*store = str;
}
if(*store == NULL) return NULL;
//*store += strspn(*store, delims);//skip delimiter
if(**store == '\0') return NULL;
p=strpbrk(wk=*store, delims);
if(p != NULL){
*p='\0';
*store = p + 1;
} else {
*store = NULL;
}
return wk;
}
int main(void){
char the_sting[] = "a|b||e|f";
char *last;
char *current;
current = my_strtok_r(the_sting, "|", &last);
while(current != NULL)
{
printf(current);
printf("\n");
current = my_strtok_r(NULL, "|", &last);
}
return 0;
}
It's easy to roll your own, really:
#include <string.h>
typedef struct {
const unsigned char *data;
size_t len;
} buffer_t;
/* Use strpbrk() for multiple delimiters. */
buffer_t
memtok(const void *s, size_t length, const char *delim, buffer_t *save_ptr)
{
const unsigned char *stream,
*token;
size_t len = 0;
if (NULL == s) {
stream = save_ptr->data;
} else {
stream = s;
save_ptr->len = length;
}
token = stream;
/* Advance until either a token is found or the stream exhausted. */
while (save_ptr->len--) {
if (memchr(delim, *stream, strlen(delim))) {
/* Point save_ptr past the (non-existent) token. */
save_ptr->data = stream + 1;
return (buffer_t) { .data = token, .len = len };
}
++len;
++stream;
}
/* State : done. */
*save_ptr = (buffer_t) { .data = NULL, .len = 0 };
/* Stream exhausted but no delimiters terminate it. */
return (buffer_t){ .data = token, .len = len };
}
and for a short test:
int main(int argc, char **argv)
{
const char *the_sting = "a|b||e|f";
buffer_t kek = { .data = the_sting, .len = 8 },
token, state;
token = memtok(the_sting, 8, "|", &state);
while (token.data != NULL) {
char test[512];
memcpy(test, token.data, token.len);
test[token.len] = 0;
printf("%s\n", test);
token = memtok(NULL, 0, "|", &state);
}
return 0;
}
how about this:
char s[] = "1,2,,,,,,,3,4,5,6";
char *tok, *saved;
tok = strtok_r(s, ",", &saved);
do
{
fprintf(stderr, "tok = %s, saved = %s\n", tok, saved);;
if (',' == *saved)
{
while (',' == *saved++ )
{
fprintf(stderr, "saved = %s\n", saved);;
}
*saved--;
}
} while( (tok = (strtok_r(((void *)0), ",", &saved))));
I need to use strtok to read in a first and last name and seperate it. How can I store the names where I can use them idependently in two seperate char arrays?
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char * test;
test = strtok (str," ");
while (test != NULL)
{
printf ("%s\n",test);
test= strtok (NULL, " ");
}
return 0;
}
Here is my take at a reasonably simple tokenize helper that
stores results in a dynamically growing array
null-terminating the array
keeps the input string safe (strtok modifies the input string, which is undefined behaviour on a literal char[], at least I think in C99)
To make the code re-entrant, use the non-standard strtok_r
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** tokenize(const char* input)
{
char* str = strdup(input);
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
char* tok=strtok(str," ");
while(1)
{
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = tok? strdup(tok) : tok;
if (!tok) break;
tok=strtok(NULL," ");
}
free(str);
return result;
}
int main ()
{
char** tokens = tokenize("test string.");
char** it;
for(it=tokens; it && *it; ++it)
{
printf("%s\n", *it);
free(*it);
}
free(tokens);
return 0;
}
Here is a strtok-free reimplementation of that (uses strpbrk instead):
char** tokenize(const char* str)
{
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
const char* e=str;
if (e) do
{
const char* s=e;
e=strpbrk(s," ");
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = e? strndup(s, e-s) : strdup(s);
} while (e && *(++e));
if (count >= capacity)
result = realloc(result, (capacity+=1)*sizeof(*result));
result[count++] = 0;
return result;
}
Do you need to store them separately? Two pointers into a modified char array will yield two separate perfectly usable strings.
That is we transform this:
char str[] ="test string.";
Into this:
char str[] ="test\0string.";
^ ^
| |
char *s1 ----- |
char *s2 -----------
.
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char *firstname = strtok(str, " ");
char *lastname = strtok(NULL, " ");
if (!lastname)
lastname = "";
printf("%s, %s\n", lastname, firstname);
return 0;
}
What about using strcpy:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_NAMES 2
int main ()
{
char str[] ="test string.";
char *names[MAX_NAMES] = { 0 };
char *test;
int i = 0;
test = strtok (str," ");
while (test != NULL && i < MAX_NAMES)
{
names[i] = malloc(strlen(test)+1);
strcpy(names[i++], test);
test = strtok (NULL, " ");
}
for(i=0; i<MAX_NAMES; ++i)
{
if(names[i])
{
puts(names[i]);
free(names[i]);
names[i] = 0;
}
}
return 0;
}
It contains much clutter to maintain a complete program and clean its resources, but the main point is to use strcpy to copy each token into its own string.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char *str, const char *delimiter, size_t *len){
char *text, *p, *first, **array;
int c;
char** ret;
*len = 0;
text=strdup(str);
if(text==NULL) return NULL;
for(c=0,p=text;NULL!=(p=strtok(p, delimiter));p=NULL, c++)//count item
if(c==0) first=p; //first token top
ret=(char**)malloc(sizeof(char*)*c+1);//+1 for NULL
if(ret==NULL){
free(text);
return NULL;
}
strcpy(text, str+(first-text));//skip until top token
array=ret;
for(p=text;NULL!=(p=strtok(p, delimiter));p=NULL){
*array++=p;
}
*array=NULL;
*len=c;
return ret;
}
void free4split(char** sa){
char **array=sa;
if(sa!=NULL){
free(array[0]);//for text
free(sa); //for array
}
}
int main(void){
char str[] ="test string.";
char **words;
size_t len=0;
int i;
words = split(str, " \t\r\n,.", &len);
/*
for(char **wk = words; *wk ;wk++){
printf("%s\n", *wk);
}
*/
for(i = 0;i<len;++i){
printf("%s\n", words[i]);
}
free4split(words);
return 0;
}
/* result:
test
string
*/
Copy the results from strtok to a new buffer using a function such as
/*
* Returns a copy of s in freshly allocated memory.
* Exits the process if memory allocation fails.
*/
char *xstrdup(char const *s)
{
char *p = malloc(strlen(s) + 1);
if (p == NULL) {
perror("memory allocation failed");
exit(1);
}
strcpy(p, s);
return p;
}
Don't forget to free the return values when you're done with them.
IMO, you don't need (and probably don't want) to use strtok at all (as in, "for this, or much of anything else"). I think I'd use code something like this:
#include <string.h>
#include <stdlib.h>
static char *make_str(char const *begin, char const *end) {
size_t len = end-begin;
char *ret = malloc(len+1);
if (ret != NULL) {
memcpy(ret, begin, len);
ret[len]='\0';
}
return ret;
}
size_t tokenize(char *tokens[], size_t max, char const *input, char const *delims) {
int i;
char const *start=input, *end=start;
for (i=0; *start && i<max; i++) {
for ( ;NULL!=strchr(delims, *start); ++start)
;
for (end=start; *end && NULL==strchr(delims, *end); ++end)
;
tokens[i] = make_str(start, end);
start = end+1;
}
return i;
}
#ifdef TEST
#define MAX_TOKENS 10
int main() {
char *tokens[MAX_TOKENS];
int i;
size_t num = tokenize(tokens, MAX_TOKENS, "This is a longer input string ", " ");
for (i=0; i<num; i++) {
printf("|%s|\n", tokens[i]);
free(tokens[i]);
}
return 0;
}
#endif
U can do something like this too.
int main ()
{
char str[] ="test string.";
char * temp1;
char * temp2;
temp1 = strtok (str," ");
temp2 = strchr(str, ' ');
if (temp2 != NULL)
temp2++;
printf ("Splitted string :%s, %s\n" , temp1 , temp2);
return
}