I have a string:
char *s = "asdf:jhgf";
I need to split this into two tokens:
token[0] = "asdf";
token[1] = "jhgf";
I'm having problems with strtok().
You can use a simple sscanf():
char token[2][80];
if(sscanf(s, "%[^:]:%s", token[0], token[1]) == 2)
{
printf("token 0='%s'\ntoken 1='%s'\n", token[0], token[1]);
}
Note that the first conversion is done using %[^:] to scan up until (but not including) the colon. Then we skip the colon, and scan an ordinary string.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(){
char *s = "asdf:jhgf";
char *token[2];
char *p = strchr(s, ':');
size_t len1 = p-s, len2 = strlen(p+1);
token[0] = malloc(len1+1);
token[1] = malloc(len2+1);
memcpy(token[0], s, len1);
token[0][len1]=0;
memcpy(token[1], p+1, len2+1);
puts(token[0]);
puts(token[1]);
free(token[0]);free(token[1]);
return 0;
}
Related
I would like to extract only /home/user01\ruuidd:x:108:114::/run/uuuidd:/usr/sbin/nologin from the code string below. What should I do?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(){
char str[1024]="\r\n\033[?2004l\r/home/user01\ruuidd:x:108:114::/run/uuidd:/usr/sbin/nologin\r\n\033[?2004h\033]0;user01#audit: ~\auser01#audit:~$ ";
char *ptr = strtok(str + 1, "\r");
strcpy(str, ptr + 1);
ptr = strtok(str, "\r\n\033");
printf("%s\n",ptr);
return 0;
}
The execution result of the code I made is as follows.
root#audit:~# ./test
output : [?2004l
strtok replaces the first delimiter found with the null-terminating byte, and returns the token that precedes it.
Additionally, strtok does not handle empty tokens, treating a series of adjacent delimiters as a single delimiter.
The following
/* first CR removed, as is the effect of the first `strtok` and `strcpy` in the example */
char str[] = "\n\033[?2004l\r/home/user01\ruuidd:x:108:114::/run/uuidd:/usr/sbin/nologin\r\n\033[?2004h\033]0;user01#audit: ~\auser01#audit:~$ ";
char *token = strtok(str, "\r\n\033")
results in a null-terminating byte being placed in str and token being equal to str + 2
/* start of token --v v-- NUL byte */
char str[] = "\n\033[?2004l\0/home/user01\ruuidd:x:108:114::/run/uuidd:/usr/sbin/nologin\r\n\033[?2004h\033]0;user01#audit: ~\auser01#audit:~$ ";
Here are a couple of naive examples that work with the exact string in question.
Using strchr and strstr:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
char str[] = "\r\n\033[?2004l\r/home/user01\ruuidd:x:108:114::/run/uuidd:/usr/sbin/nologin\r\n\033[?2004h\033]0;user01#audit: ~\auser01#audit:~$ ";
char *base = strchr(str, '/');
char *tail = strstr(base, "\r\n");
/* for printing purposes remove the CR */
*strchr(base, '\r') = '_';
printf("%.*s\n", (int) (tail - base), base);
}
Using strchr and strcspn:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
char str[] = "\r\n\033[?2004l\r/home/user01\ruuidd:x:108:114::/run/uuidd:/usr/sbin/nologin\r\n\033[?2004h\033]0;user01#audit: ~\auser01#audit:~$ ";
char *base = strchr(str, '/');
size_t length = strcspn(base, "\n");
/* for printing purposes remove the CR */
*strchr(base, '\r') = '_';
/* `length - 1` removes the trailing CR */
printf("%.*s\n", (int) (length - 1), base);
}
Both of these find the span of characters ('\r' replaced with '_' for display purposes):
/home/user01_uuidd:x:108:114::/run/uuidd:/usr/sbin/nologin
Note that strchr and strstr can return NULL, and passing NULL to any of these functions results in Undefined Behaviour. strcspn can obviously return 0, in which case subtracting one from this value would result in SIZE_MAX. These situations should all be considered and guarded against in a real program.
Undefined behavior
strcpy(str, ptr + 1); is (UB) is it attempts to copy a string where the source and destination overlap. Don't do that.
char *strcpy(char * restrict s1, const char * restrict s2);
The restrict in strcpy() implies the caller should not pass in pointers to overlapping areas.
Research strspn() and strcspn() as a replacement for strtok(). Used as pair, they work like strtok() without changing the source string.
Something like below. Notice that the str[1024] is even const.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
const char str[1024] =
"\r\n\033[?2004l\r/home/user01\ruuidd:x:108:114::/run/uuidd:/usr/sbin/nologin\r\n\033[?2004h\033]0;user01#audit: ~\auser01#audit:~$ ";
const char *start1 = str + 1;
const char *token = "\r";
const char *end1 = start1 + strcspn(start1, token);
printf("%.*s\n", (int) (end1 - start1), start1);
const char *start2 = end1 + strspn(end1, token);
const char *end2 = start2 + strcspn(start2, token);
printf("%.*s\n", (int) (end2 - start2), start2);
const char *start3 = end2 + strspn(end2, token);
token = "\r\n\033";
const char *end3 = start3 + strcspn(start3, token);
printf("%.*s\n", (int) (end3 - start3), start3);
return 0;
}
Output
[?2004l
/home/user01
uuidd:x:108:114::/run/uuidd:/usr/sbin/nologin
Or perhaps with a loop?
int main(void) {
const char str[1024] =
"\r\n\033[?2004l\r/home/user01\ruuidd:x:108:114::/run/uuidd:/usr/sbin/nologin\r\n\033[?2004h\033]0;user01#audit: ~\auser01#audit:~$ ";
const char *token = "\r\n\033";
const char *ptr = str;
size_t offset;
while ((offset = strspn(ptr, token)) != 0) {
const char *start = ptr + offset;
const char *end = start + strcspn(start, token);
printf("%.*s\n", (int) (end - start), start);
ptr = end;
}
return 0;
}
Output
[?2004l
/home/user01
uuidd:x:108:114::/run/uuidd:/usr/sbin/nologin
[?2004h
]0;user01#audit: ~user01#audit:~$
I have the following string abcd1234 and I want to find a way to break this string into two different strings, abcd and 1234. I have tried the following code:
char buf[100],*str1,*str2;
int x;
fgets(buf,sizeof(buf),stdin);
str1=strtok(buf,"0123456789 \t\n");
str2=strtok(NULL," \n\t\0");
puts(str1);
puts(str2);
x=atoi(str2);
printf("x=%d", x);
but output is abcd 234. And if I try it with one letter and one number, e.g a2 I take only e on output and x is 0.
As per the man page of strtok()
Each call to strtok() returns a pointer to a null-terminated string containing the next token. This string does not include the delimiting byte. [...]
So, while using "0123456789 \t\n" as the delimiter for the first time, 1 will be treated as the actual delimiter and will not be considered in the subsequent parsing.
You may want to use strcspn() and/or strpbrk() to find out the index for the required sub-strings and parse accordingly.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
size_t extract(const char **sp, char *out, int (*test)(int ch));
int main(void){
char buf[100], str1[100], str2[100];
int x;
const char *p = buf;
//size_t len;
fgets(buf, sizeof(buf), stdin);
while(*p){
if(isalpha((unsigned char)*p)){
extract(&p, str1, isalpha);
puts(str1);
} else if(isdigit((unsigned char)*p)){
extract(&p, str2, isdigit);
x = atoi(str2);
printf("%s, x=%d\n", str2, x);
} else {
++p;//skip one char
}
}
return 0;
}
size_t extract(const char **sp, char *out, int (*test)(int ch)){
const char *p = *sp;
while(*p && test((unsigned char)*p)){
*out++ = *p++;
}
*out = '\0';
size_t len = p - *sp;
*sp = p;
return len;
}
Try below code.Hope this will help you.
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main()
{
char string[]="abcd1234";
char digitStr[10];
char charStr[10];
int i,j = 0,k = 0;
for(i=0;string[i];i++)
{
if(isdigit(string[i]))
{
charStr[j++]=string[i];
}
else
{
digitStr[k++]=string[i];
}
}
charStr[j] = '\0';
digitStr[k] = '\0';
printf("%s %s\n",digitStr,charStr);
}
I realize I'm very late on this one, but this is for if anyone has a similar case
Assuming all input strings are like your example, this method will work.
char buf[100];
fgets(buf, sizeof(buf), stdin);
if (buf[strlen(buf) - 1] == '\n')
buf[strlen(buf) - 1] = '\0';
int x = atoi(strpbrk(buf, "0123456789"));
char letters[number - buf + 1];
memcpy(letters, sizeof(letters) - 1, buf);
letters[sizeof(letters) - 1] = '\0';
//letters is the word
//x is the number as an int, not a string
• Note the if statement after the fgets. This checks that the newline character was read by fgets, and turns it into a NUL character. (essentially truncating the string).
• As for strpbrk(), that's just a function that returns a pointer to the first occurence of any character in the second string inside the first string. I use it here to find the start of the digit sequence.
• I would also drop the atoi() for strtol() for safety.
• The letters[] array size is the return of strpbrk() (the address of the first number), minus the start of the array (giving the length of the letter string in bytes), plus one for the NUL character I add later.
I have a string that includes two names and a comma how can i take them apart nd write them to seperate strings.
Example
char *line="John Smith,Jane Smith";
I am thinking of using sscanf function.
sscanf(line,"%s,%s",str1,str2);
What should i do?
note: I can change comma to space character.
I am thinking of using sscanf function.
Don't even think about it.
char line[] = "John Smith,Jane Smith";
char *comma = strchr(line, ',');
*comma = 0;
char *firstName = line;
char *secondName = comma + 1;
Here's how you could do it using strtok:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
// Your string.
char *line = "John Smith,10,Jane Smith";
// Let's work with a copy of your string.
char *line_copy = malloc(1 + strlen(line));
strcpy(line_copy, line);
// Get the first person.
char *pointer = strtok(line_copy, ",");
char *first = malloc(1 + strlen(pointer));
strcpy(first, pointer);
// Skip the number.
strtok(NULL, ",");
// Get the second person.
pointer = strtok(NULL, ",");
char *second = malloc(1 + strlen(pointer));
strcpy(second, pointer);
// Print.
printf("%s\n%s", first, second);
return 0;
}
How to safety parse tab-delimiter string ? for example:
test\tbla-bla-bla\t2332 ?
strtok() is a standard function for parsing strings with arbitrary delimiters. It is, however, not thread-safe. Your C library of choice might have a thread-safe variant.
Another standard-compliant way (just wrote this up, it is not tested):
#include <string.h>
#include <stdio.h>
int main()
{
char string[] = "foo\tbar\tbaz";
char * start = string;
char * end;
while ( ( end = strchr( start, '\t' ) ) != NULL )
{
// %s prints a number of characters, * takes number from stack
// (your token is not zero-terminated!)
printf( "%.*s\n", end - start, start );
start = end + 1;
}
// start points to last token, zero-terminated
printf( "%s", start );
return 0;
}
Use strtok_r instead of strtok (if it is available). It has similar usage, except it is reentrant, and it does not modify the string like strtok does. [Edit: Actually, I misspoke. As Christoph points out, strtok_r does replace the delimiters by '\0'. So, you should operate on a copy of the string if you want to preserve the original string. But it is preferable to strtok because it is reentrant and thread safe]
strtok will leave your original string modified. It replaces the delimiter with '\0'. And if your string happens to be a constant, stored in a read only memory (some compilers will do that), you may actually get a access violation.
Using strtok() from string.h.
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] = "test\tbla-bla-bla\t2332";
char * pch;
pch = strtok (str," \t");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " \t");
}
return 0;
}
You can use any regex library or even the GLib GScanner, see here and here for more information.
Yet another version; this one separates the logic into a new function
#include <stdio.h>
static _Bool next_token(const char **start, const char **end)
{
if(!*end) *end = *start; // first call
else if(!**end) // check for terminating zero
return 0;
else *start = ++*end; // skip tab
// advance to terminating zero or next tab
while(**end && **end != '\t')
++*end;
return 1;
}
int main(void)
{
const char *string = "foo\tbar\tbaz";
const char *start = string;
const char *end = NULL; // NULL value indicates first call
while(next_token(&start, &end))
{
// print substring [start,end[
printf("%.*s\n", end - start, start);
}
return 0;
}
If you need a binary safe way to tokenize a given string:
#include <string.h>
#include <stdio.h>
void tokenize(const char *str, const char delim, const size_t size)
{
const char *start = str, *next;
const char *end = str + size;
while (start < end) {
if ((next = memchr(start, delim, end - start)) == NULL) {
next = end;
}
printf("%.*s\n", next - start, start);
start = next + 1;
}
}
int main(void)
{
char str[] = "test\tbla-bla-bla\t2332";
int len = strlen(str);
tokenize(str, '\t', len);
return 0;
}
How can I strip a string with all \n and \t in C?
This works in my quick and dirty tests. Does it in place:
#include <stdio.h>
void strip(char *s) {
char *p2 = s;
while(*s != '\0') {
if(*s != '\t' && *s != '\n') {
*p2++ = *s++;
} else {
++s;
}
}
*p2 = '\0';
}
int main() {
char buf[] = "this\t is\n a\t test\n test";
strip(buf);
printf("%s\n", buf);
}
And to appease Chris, here is a version which will make a place the result in a newly malloced buffer and return it (thus it'll work on literals). You will need to free the result.
char *strip_copy(const char *s) {
char *p = malloc(strlen(s) + 1);
if(p) {
char *p2 = p;
while(*s != '\0') {
if(*s != '\t' && *s != '\n') {
*p2++ = *s++;
} else {
++s;
}
}
*p2 = '\0';
}
return p;
}
If you want to replace \n or \t with something else, you can use the function strstr(). It returns a pointer to the first place in a function that has a certain string. For example:
// Find the first "\n".
char new_char = 't';
char* pFirstN = strstr(szMyString, "\n");
*pFirstN = new_char;
You can run that in a loop to find all \n's and \t's.
If you want to "strip" them, i.e. remove them from the string, you'll need to actually use the same method as above, but copy the contents of the string "back" every time you find a \n or \t, so that "this i\ns a test" becomes: "this is a test".
You can do that with memmove (not memcpy, since the src and dst are pointing to overlapping memory), like so:
char* temp = strstr(str, "\t");
// Remove \n.
while ((temp = strstr(str, "\n")) != NULL) {
// Len is the length of the string, from the ampersand \n, including the \n.
int len = strlen(str);
memmove(temp, temp + 1, len);
}
You'll need to repeat this loop again to remove the \t's.
Note: Both of these methods work in-place. This might not be safe! (read Evan Teran's comments for details.. Also, these methods are not very efficient, although they do utilize a library function for some of the code instead of rolling your own.
Basically, you have two ways to do this: you can create a copy of the original string, minus all '\t' and '\n' characters, or you can strip the string "in-place." However, I bet money that the first option will be faster, and I promise you it will be safer.
So we'll make a function:
char *strip(const char *str, const char *d);
We want to use strlen() and malloc() to allocate a new char * buffer the same size as our str buffer. Then we go through str character by character. If the character is not contained in d, we copy it into our new buffer. We can use something like strchr() to see if each character is in the string d. Once we're done, we have a new buffer, with the contents of our old buffer minus characters in the string d, so we just return that. I won't give you sample code, because this might be homework, but here's the sample usage to show you how it solves your problem:
char *string = "some\n text\t to strip";
char *stripped = strip(string, "\t\n");
This is a c string function that will find any character in accept and return a pointer to that position or NULL if it is not found.
#include <string.h>
char *strpbrk(const char *s, const char *accept);
Example:
char search[] = "a string with \t and \n";
char *first_occ = strpbrk( search, "\t\n" );
first_occ will point to the \t, or the 15 character in search. You can replace then call again to loop through until all have been replaced.
I like to make the standard library do as much of the work as possible, so I would use something similar to Evan's solution but with strspn() and strcspn().
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SPACE " \t\r\n"
static void strip(char *s);
static char *strip_copy(char const *s);
int main(int ac, char **av)
{
char s[] = "this\t is\n a\t test\n test";
char *s1 = strip_copy(s);
strip(s);
printf("%s\n%s\n", s, s1);
return 0;
}
static void strip(char *s)
{
char *p = s;
int n;
while (*s)
{
n = strcspn(s, SPACE);
strncpy(p, s, n);
p += n;
s += n + strspn(s+n, SPACE);
}
*p = 0;
}
static char *strip_copy(char const *s)
{
char *buf = malloc(1 + strlen(s));
if (buf)
{
char *p = buf;
char const *q;
int n;
for (q = s; *q; q += n + strspn(q+n, SPACE))
{
n = strcspn(q, SPACE);
strncpy(p, q, n);
p += n;
}
*p++ = '\0';
buf = realloc(buf, p - buf);
}
return buf;
}