I need to use strtok to read in a first and last name and seperate it. How can I store the names where I can use them idependently in two seperate char arrays?
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char * test;
test = strtok (str," ");
while (test != NULL)
{
printf ("%s\n",test);
test= strtok (NULL, " ");
}
return 0;
}
Here is my take at a reasonably simple tokenize helper that
stores results in a dynamically growing array
null-terminating the array
keeps the input string safe (strtok modifies the input string, which is undefined behaviour on a literal char[], at least I think in C99)
To make the code re-entrant, use the non-standard strtok_r
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** tokenize(const char* input)
{
char* str = strdup(input);
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
char* tok=strtok(str," ");
while(1)
{
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = tok? strdup(tok) : tok;
if (!tok) break;
tok=strtok(NULL," ");
}
free(str);
return result;
}
int main ()
{
char** tokens = tokenize("test string.");
char** it;
for(it=tokens; it && *it; ++it)
{
printf("%s\n", *it);
free(*it);
}
free(tokens);
return 0;
}
Here is a strtok-free reimplementation of that (uses strpbrk instead):
char** tokenize(const char* str)
{
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
const char* e=str;
if (e) do
{
const char* s=e;
e=strpbrk(s," ");
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = e? strndup(s, e-s) : strdup(s);
} while (e && *(++e));
if (count >= capacity)
result = realloc(result, (capacity+=1)*sizeof(*result));
result[count++] = 0;
return result;
}
Do you need to store them separately? Two pointers into a modified char array will yield two separate perfectly usable strings.
That is we transform this:
char str[] ="test string.";
Into this:
char str[] ="test\0string.";
^ ^
| |
char *s1 ----- |
char *s2 -----------
.
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char *firstname = strtok(str, " ");
char *lastname = strtok(NULL, " ");
if (!lastname)
lastname = "";
printf("%s, %s\n", lastname, firstname);
return 0;
}
What about using strcpy:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_NAMES 2
int main ()
{
char str[] ="test string.";
char *names[MAX_NAMES] = { 0 };
char *test;
int i = 0;
test = strtok (str," ");
while (test != NULL && i < MAX_NAMES)
{
names[i] = malloc(strlen(test)+1);
strcpy(names[i++], test);
test = strtok (NULL, " ");
}
for(i=0; i<MAX_NAMES; ++i)
{
if(names[i])
{
puts(names[i]);
free(names[i]);
names[i] = 0;
}
}
return 0;
}
It contains much clutter to maintain a complete program and clean its resources, but the main point is to use strcpy to copy each token into its own string.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char *str, const char *delimiter, size_t *len){
char *text, *p, *first, **array;
int c;
char** ret;
*len = 0;
text=strdup(str);
if(text==NULL) return NULL;
for(c=0,p=text;NULL!=(p=strtok(p, delimiter));p=NULL, c++)//count item
if(c==0) first=p; //first token top
ret=(char**)malloc(sizeof(char*)*c+1);//+1 for NULL
if(ret==NULL){
free(text);
return NULL;
}
strcpy(text, str+(first-text));//skip until top token
array=ret;
for(p=text;NULL!=(p=strtok(p, delimiter));p=NULL){
*array++=p;
}
*array=NULL;
*len=c;
return ret;
}
void free4split(char** sa){
char **array=sa;
if(sa!=NULL){
free(array[0]);//for text
free(sa); //for array
}
}
int main(void){
char str[] ="test string.";
char **words;
size_t len=0;
int i;
words = split(str, " \t\r\n,.", &len);
/*
for(char **wk = words; *wk ;wk++){
printf("%s\n", *wk);
}
*/
for(i = 0;i<len;++i){
printf("%s\n", words[i]);
}
free4split(words);
return 0;
}
/* result:
test
string
*/
Copy the results from strtok to a new buffer using a function such as
/*
* Returns a copy of s in freshly allocated memory.
* Exits the process if memory allocation fails.
*/
char *xstrdup(char const *s)
{
char *p = malloc(strlen(s) + 1);
if (p == NULL) {
perror("memory allocation failed");
exit(1);
}
strcpy(p, s);
return p;
}
Don't forget to free the return values when you're done with them.
IMO, you don't need (and probably don't want) to use strtok at all (as in, "for this, or much of anything else"). I think I'd use code something like this:
#include <string.h>
#include <stdlib.h>
static char *make_str(char const *begin, char const *end) {
size_t len = end-begin;
char *ret = malloc(len+1);
if (ret != NULL) {
memcpy(ret, begin, len);
ret[len]='\0';
}
return ret;
}
size_t tokenize(char *tokens[], size_t max, char const *input, char const *delims) {
int i;
char const *start=input, *end=start;
for (i=0; *start && i<max; i++) {
for ( ;NULL!=strchr(delims, *start); ++start)
;
for (end=start; *end && NULL==strchr(delims, *end); ++end)
;
tokens[i] = make_str(start, end);
start = end+1;
}
return i;
}
#ifdef TEST
#define MAX_TOKENS 10
int main() {
char *tokens[MAX_TOKENS];
int i;
size_t num = tokenize(tokens, MAX_TOKENS, "This is a longer input string ", " ");
for (i=0; i<num; i++) {
printf("|%s|\n", tokens[i]);
free(tokens[i]);
}
return 0;
}
#endif
U can do something like this too.
int main ()
{
char str[] ="test string.";
char * temp1;
char * temp2;
temp1 = strtok (str," ");
temp2 = strchr(str, ' ');
if (temp2 != NULL)
temp2++;
printf ("Splitted string :%s, %s\n" , temp1 , temp2);
return
}
Related
I'm building a word counter program. To achieve this, I was thinking about saving the string the user inputted, and using strtok() to split the sentence with space as the delimiter. But first I want to allocate enough memory for each word. Let's say the sentence is "Hello World". I've already dynamically allocated memory for the string itself. Now I want to split Hello World into 2 strings, "Hello" and "World". My goal is to allocate enough memory so that there's not too much empty space but I also don't want to allocate too little space. Here is my code so far:
#include <stdio.h>
#include <stdlib.h>
char *strmalloc(char **string);
char *user_input = NULL;
char *word_array[];
int main(void) {
printf("Enter a sentence to find out the number of words: ");
user_input = strmalloc(&user_input);
return 0;
}
char *strmalloc(char **string) {
char *tmp = NULL;
size_t size = 0, index = 0;
int ch;
while ((ch = getchar()) != '\n' && ch != EOF) {
if (size <= index) {
size += 1;
tmp = realloc(*string, size);
if (!tmp) {
free(*string);
string = NULL;
break;
}
*string = tmp;
}
(*string)[index++] = ch;
}
return *string;
}
How would I go about doing this? Should I do the splitting first or allocate the space required for the array first?
You can count words without splitting the sentence, here is an example :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
// Change this to change the separator characters
static inline char isSeparator(char ch) { return isspace(ch) || ispunct(ch); }
char * jumpSeparator(char *string) {
while(string[0] && isSeparator(string[0])) string++;
return string;
}
char * findEndOfWord(char *string) {
while (string[0] && !isSeparator(string[0])) string++;
return string;
}
int countWords(char *string) {
char * ptr = jumpSeparator(string);
if (strlen(ptr) == 0) return 0;
int count = 1;
while((ptr = findEndOfWord(ptr)) && ptr[0]) {
ptr = jumpSeparator(ptr);
if (!ptr) break;
count++;
}
return count;
}
int main() {
char * sentence = "This is,a function... to||count words";
int count = countWords(sentence);
printf("%d\n", count); //====> 7
}
EDIT : Reusing the same functions here is another example that allocates substrings dynamically :
int main() {
char * sentence = "This is,a function... to||split words";
int count = countWords(sentence);
char * ptr = sentence, *start, *end;
char ** substrings = malloc(count * sizeof(char *));
int i=0;
while((ptr = jumpSeparator(ptr)) && ptr[0]) {
start = ptr;
ptr = findEndOfWord(ptr);
end = ptr;
int len = end-start;
char * newString = malloc(len + 1);
memcpy(newString, start, len);
newString[len] = 0;
substrings[i++] = newString;
}
// Prints the result
for(int i=0; i<count; i++) printf("%s\n", substrings[i]);
// Frees the allocated memory
for(int i=0; i<count; i++) free(substrings[i]);
free(substrings);
return 0;
}
Output :
This
is
a
function
to
split
words
I am attempting to substitute parts of a string with keywords in C however when trying to make the string and passing it to the replace_str function it is not producing an output with any replacements whilst making it a static string allows the code to function.
//C default libraries
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *replace_str(char *str, char *orig, char *rep) //from https://www.linuxquestions.org/questions/programming-9/replace-a-substring-with-another-string-in-c-170076/
{
static char buffer[4096];
char *p;
if (!(p = strstr(str, orig))) // Is 'orig' even in 'str'?
return str;
strncpy(buffer, str, p - str); // Copy characters from 'str' start to 'orig' st$
buffer[p - str] = '\0';
sprintf(buffer + (p - str), "%s%s", rep, p + strlen(orig));
return buffer;
}
char* expanded(char *line)
{
char *keywords[] = { "online","blog" };
char *retval = "";
//char *buf = "[1]";
char buf[12];
for (int i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++)
{
snprintf(buf, 12, "[%d]", i);
retval = replace_str(line, buf, keywords[i]);
}
return retval;
}
int main()
{
char str[100];
gets(str);
printf(expanded(str));
getchar();
return 0;
}
When entering 'America[0]' I would expect 'Americaonline' to be returned however I only get America[0] back.
I am trying to split a line into an array of words, but I am stuck on how to do this in C. My skills in C aren't very good, so I can't think of a way to "execute" my idea. Her is what I have so far:
int beginIndex = 0;
int endIndex = 0;
int maxWords = 10;
while (1) {
while (!isspace(str)) {
endIndex++;
}
char *tmp = (string from 'str' from beginIndex to endIndex)
arr[wordCnt] = tmp;
wordCnt++;
beginIndex = endIndex;
if (wordCnt = maxWords) {
return;
}
}
In my method I receive (char *str, char *arr[10]), and str is the line that I want to split when I encounter a space. arr is the array where I want to store the words. Is there any way to copy the 'chunk' of string that I want from 'str' into my tmp variable? This is the best way that I can think of right now, perhaps it's a terrible idea. If so, I would be happy to get some documentation or tips on a better method.
You should check out the C Library function strtok. You simply feed it the string you want to break up and a string of delimiters.
Here is an example of how it works (taken from the linked site):
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL) {
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
In your case instead of printing each string you would assign the pointer returned by strtok to the next element in your array arr.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int split(char *str, char *arr[10]){
int beginIndex = 0;
int endIndex;
int maxWords = 10;
int wordCnt = 0;
while(1){
while(isspace(str[beginIndex])){
++beginIndex;
}
if(str[beginIndex] == '\0')
break;
endIndex = beginIndex;
while (str[endIndex] && !isspace(str[endIndex])){
++endIndex;
}
int len = endIndex - beginIndex;
char *tmp = calloc(len + 1, sizeof(char));
memcpy(tmp, &str[beginIndex], len);
arr[wordCnt++] = tmp;
beginIndex = endIndex;
if (wordCnt == maxWords)
break;
}
return wordCnt;
}
int main(void) {
char *arr[10];
int i;
int n = split("1st 2nd 3rd", arr);
for(i = 0; i < n; ++i){
puts(arr[i]);
free(arr[i]);
}
return 0;
}
Is there a better of parsing the below string instead of doing a strtok() to get each field.
"subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000"
Basically I want to retrieve the value for each field into another char buf's.
Here is my code. Just wanted to know if there is any other better way of doing it (any better string parsing algos)
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#define SUBJECT "subject="
#define CC_LIST "cc="
#define SERVER "server="
static void
get_value (const char *tok, char **rval_buf, size_t field_len)
{
size_t val_size = 0;
if (!tok || !rval_buf)
return;
val_size = strlen(tok + field_len) + 1;
*rval_buf = calloc(1, val_size);
if (*rval_buf) {
strlcpy(*rval_buf, tok + field_len, val_size);
}
}
int
main (int argc, char **argv)
{
/* hard coded buf for testing */
char buf[] = "subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000";
char *subject_text = NULL;
char *cc_list = NULL;
char *server_addr = NULL;
char *tok = NULL;
int field_len = 0;
int val_len = 0;
tok = strtok(buf, "&");
while(tok) {
/*
* Handle the token
*/
/* check if it is subject */
if (strstr(tok, SUBJECT)) {
get_value(tok, &subject_text, strlen(SUBJECT));
} else if (strstr(tok, CC_LIST)) { /* check if it is CC */
get_value(tok, &cc_list, strlen(CC_LIST));
} else if (strstr(tok, SERVER)) { /* check if it is server */
get_value(tok, &server_addr, strlen(SERVER));
}
tok = strtok(NULL, "&");
}
/* dump data */
fprintf(stdout, "\nSUBJECT: \"%s\"\nCC_LIST: \"%s\"\nSERVER: \"%s\" \n\n",
subject_text, cc_list, server_addr);
return EXIT_SUCCESS;
}
strstr searches for one string ("the needle") inside another ("the haystack"), but you really only want to know whether the needle is the beginning of the haystack.
Here's a small suggestion: (requires #include <stdbool> or change the booleans to ints. I like bools.)
static bool
getval(const char* haystack, const char** res, const char* needle, size_t len) {
if (haystack && 0 == strncmp(haystack, needle, len)) {
*res = strdup(haystack + len);
return true;
}
return false;
}
and later:
for (tok = strtok(buf, "&"); tok; tok = strtok(NULL, "&")) {
getval(tok, &subject_text, SUBJECT, strlen(SUBJECT)) ||
getval(tok, &cc_list, CC_LIST, strlen(CC_LIST)) ||
getval(tok, &server_addr, SERVER, strlen(SERVER));
}
You can actually get away with doing the strlen inside of getval, which cuts down a lot on the noise, because most modern compilers are clever enough to inline getval and constant-fold the length of a constant string.
Use strtok()
char *strtok(char *str, const char *delim)
You can put '&' as a delimeter
I wrote a quick-n-dirty splitter for you:
int split(char* input, char delim, char*** parts)
{
int count = 1;
char** result;
char* t = input;
while(*t != '\0')
{
if (*t++ == delim)
{
count++;
}
}
result = (char**)malloc(count * sizeof(char*));
t = input;
int i = 0;
result[i] = input;
while(*t != '\0')
{
if (*t == delim)
{
*t = '\0';
result[++i] = ++t;
}
else
{
t++;
}
}
*parts = result;
return count;
}
int main()
{
char raw[] = "subject=\"some text\"&cc=abcd&server=acd.com";
char* str = _strdup(raw);
char** parts;
char** keyval;
int cnt = split(str, '&', &parts);
for(int i=0; i<cnt; ++i)
{
split(parts[i], '=', &keyval);
printf("[%d]: %s <--> %s\n", i, keyval[0], keyval[1]);
free(keyval);
}
free(parts);
getchar();
return 0;
}
Output
[0]: subject <--> "some text"
[1]: cc <--> abcd
[2]: server <--> acd.com
I have a text like this:
char* str="Hi all.\nMy name is Matteo.\n\nHow are you?"
and I want to split the string by "\n\n" in to an array like this:
char* array[3];
array[0]="Hi all.\nMy name is Matteo."
array[1]="How are you?"
array[2]=NULL
I've tried the strtok function but it does not split the string correctly.
#include <stdio.h>
#include <string.h>
int main(){
char *str="Hi all.\nMy name is Matteo.\n\nHow are you?";
char *array[3];
char *ptop, *pend;
char wk[1024];//char *wk=malloc(sizeof(char)*(strlen(str)+3));
int i, size = sizeof(array)/sizeof(char*);
/*
array[0]="Hi all.\nMy name is Matteo."
array[1]="How are you?"
array[2]=NULL
*/
strcpy(wk, str);
strcat(wk, "\n\n");
for(i=0, ptop=wk;i<size;++i){
if(NULL!=(pend=strstr(ptop, "\n\n"))){
*pend='\0';
array[i]=strdup(ptop);
ptop=pend+2;
} else {
array[i]=NULL;
break;
}
}
for(i = 0;i<size;++i)
printf("array[%d]=\"%s\"\n", i, array[i]);
return 0;
}
The strtok() function works on a set of single character delimiters. Your goal is to split by a two character delimiter, so strtok() isn't a good fit.
You could scan your input string via a loop that used strchr to find newlines and then checked to see if the next char was also a newline.
A more generic method based on strstr function:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main(void) {
char* str="Hi all.\nMy name is Matteo.\n\nHow are you?\n\nThanks";
char **result = NULL;
unsigned int index = 0;
unsigned int i = 0;
size_t size = 0;
char *ptr, *pstr;
ptr = NULL;
pstr = str;
while(pstr) {
ptr = strstr(pstr, "\n\n");
result = realloc(result, (index + 1) * sizeof(char *));
size = strlen(pstr) - ((ptr)?strlen(ptr):0);
result[index] = malloc(size * sizeof(char));
strncpy(result[index], pstr, size);
index++;
if(ptr) {
pstr = ptr + 2;
} else {
pstr = NULL;
}
} ;
for(i = 0; i < index; i++) {
printf("Array[%d] : >%s<\n", i, result[i]);
}
return 0;
}