C strtok, what if I want to keep the delimiter? - c

I'm trying to split lines of the type:
GM 1 2 3 ! this is a comment
to separate out the comment section. There are several possible comment delimiters, !, ' and #. strtok is the obvious solution for this:
card->card_str = strtok(line_buf, "!'#");
producing GM 1 2 3 and this is a comment. However, for this role, I need to keep the delimiter character in the second string, so in this case ! this is a comment. Is there an easy way to do this?

strtok is rarely the right tool for parsing jobs because it has many quirks and side effects.
For your goal, you can use strcspn():
void parse_input_line(const char *line) {
size_t len = strcspn(line, "!'#");
char *p = malloc(len + 1);
if (p != NULL) {
memcpy(p, line, len);
p[len] = '\0';
card->card_str = p;
card->card_comment = p[len] ? strdup(p + len) : NULL;
}
}
Alternately, you can use strpbrk:
void parse_input_line(const char *line) {
const char *sep = strpbrk(line, "!'#");
if (sep == NULL) {
// no comment
card->card_str = strdup(line);
card->card_comment = NULL;
} else {
size_t len = sep - line;
char *p = malloc(len + 1);
if (p != NULL) {
memcpy(p, line, len);
p[len] = '\0';
card->card_str = p;
card->card_comment = strdup(sep);
}
}
}
You can use strndup to make the code more readable:
void parse_input_line(const char *line) {
size_t len = strcspn(line, "!'#");
if (p[len] == '\0') {
/* no comment */
card->card_str = strdup(line);
card->card_comment = NULL;
} else {
card->card_str = strndup(line, len);
card->card_comment = strdup(p + len);
}
}
strndup may not be available on all systems, here is a simple implementation:
size_t strnlen(const char *s, size_t n) {
size_t len;
for (len = 0; len < n; len++) {
if (s[len] == '\0')
break;
}
return len;
}
char *strndup(const char *s, size_t n) {
size_t len = strnlen(s, n);
char *p = malloc(len + 1);
if (p != NULL) {
memcpy(p, s, len);
p[len] = '\0';
}
return p;
}

Related

Copying specific number of characters from a string to another

I have a variable length string that I am trying to divide from plus signs and study on:
char string[] = "var1+vari2+varia3";
for (int i = 0; i != sizeof(string); i++) {
memcpy(buf, string[0], 4);
buf[9] = '\0';
}
since variables are different in size I am trying to write something that is going to take string into loop and extract (divide) variables. Any suggestions ? I am expecting result such as:
var1
vari2
varia3
You can use strtok() to break the string by delimiter
char string[]="var1+vari2+varia3";
const char delim[] = "+";
char *token;
/* get the first token */
token = strtok(string, delim);
/* walk through other tokens */
while( token != NULL ) {
printf( " %s\n", token );
token = strtok(NULL, delim);
}
More info about the strtok() here: https://man7.org/linux/man-pages/man3/strtok.3.html
It seems to me that you don't just want to want to print the individual strings but want to save the individual strings in some buffer.
Since you can't know the number of strings nor the length of the individual string, you should allocate memory dynamic, i.e. use functions like realloc, calloc and malloc.
It can be implemented in several ways. Below is one example. To keep the example simple, it's not performance optimized in anyway.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
char** split_string(const char* string, const char* token, int* num)
{
assert(string != NULL);
assert(token != NULL);
assert(num != NULL);
assert(strlen(token) != 0);
char** data = NULL;
int num_strings = 0;
while(*string)
{
// Allocate memory for one more string pointer
char** ptemp = realloc(data, (num_strings + 1) * sizeof *data);
if (ptemp == NULL) exit(1);
data = ptemp;
// Look for token
char* tmp = strstr(string, token);
if (tmp == NULL)
{
// Last string
// Allocate memory for one more string and copy it
int len = strlen(string);
data[num_strings] = calloc(len + 1, 1);
if (data[num_strings] == NULL) exit(1);
memcpy(data[num_strings], string, len);
++num_strings;
break;
}
// Allocate memory for one more string and copy it
int len = tmp - string;
data[num_strings] = calloc(len + 1, 1);
if (data[num_strings] == NULL) exit(1);
memcpy(data[num_strings], string, len);
// Prepare to search for next string
++num_strings;
string = tmp + strlen(token);
}
*num = num_strings;
return data;
}
int main()
{
char string[]="var1+vari2+varia3";
// Split the string into dynamic allocated memory
int num_strings;
char** data = split_string(string, "+", &num_strings);
// Now data can be used as an array-of-strings
// Example: Print the strings
printf("Found %d strings:\n", num_strings);
for(int i = 0; i < num_strings; ++i) printf("%s\n", data[i]);
// Free the memory
for(int i = 0; i < num_strings; ++i) free(data[i]);
free(data);
}
Output
Found 3 strings:
var1
vari2
varia3
You can use a simple loop scanning the string for + signs:
char string[] = "var1+vari2+varia3";
char buf[sizeof(string)];
int start = 0;
for (int i = 0;;) {
if (string[i] == '+' || string[i] == '\0') {
memcpy(buf, string + start, i - start);
buf[i - start] = '\0';
// buf contains the substring, use it as a C string
printf("%s\n", buf);
if (string[i] == '\0')
break;
start = ++i;
} else {
i++;
}
}
Your code does not have any sense.
I wrote such a function for you. Analyse it as sometimes is good to have some code as a base
char *substr(const char *str, char *buff, const size_t start, const size_t len)
{
size_t srcLen;
char *result = buff;
if(str && buff)
{
if(*str)
{
srcLen = strlen(str);
if(srcLen < start + len)
{
if(start < srcLen) strcpy(buff, str + start);
else buff[0] = 0;
}
else
{
memcpy(buff, str + start, len);
buff[len] = 0;
}
}
else
{
buff[0] = 0;
}
}
return result;
}
https://godbolt.org/z/GjMEqx

Need help to split a string by delimiter (and keep the delimiter in the token list)

I want to split a string by a delimiter and keep the delimiter in the token list
I have a function that do the same thing as strtok but with a string delimiter (instead of a set of chars) but it doesn't keep the delimiter and can't take an array of delimiters as argument
This is a function that split a string into tokens as strtok does but taking a delimiter
static char *strtokstr(char *str, char *delimiter)
{
static char *string;
char *end;
char *ret;
if (str != NULL)
string = str;
if (string == NULL)
return string;
end = strstr(string, delimiter);
if (end == NULL) {
char *ret = string;
string = NULL;
return ret;
}
ret = string;
*end = '\0';
string = end + strlen(delimiter);
return ret;
}
I want to have a char **split(char *str, char **delimiters_list) that split a string by a set of delimiters and keep the delimiter in the token list
I think I also need a function to count the number of tokens so i can malloc the return of my split function
// delimiters is an array containing ["&&", "||" and NULL]
split("ls > file&&foo || bar", delimiters) should return an array containing ["ls > file", "&&", "foo ", "||", " bar"]
How that can be achieved ?
First, you have a memory error here :
static char *string;
if (str != NULL)
string = str;
if (string == NULL)
return string;
If stris NULL, string is not initialised and you use a uninitialised value in comparaison.
if you want copy a string, you must use the strdup function, the = will just copy the pointer and not the pointer content.
And here a way to do it :
#include <stdlib.h>
#include <string.h>
char *get_delimiters(char *str, char **delims)
{
for (int i = 0; delims[i]; i++)
if (!strncmp(str, delims[i], strlen(delims[i])))
return delims[i];
return NULL;
}
char **split(char *str, char **delimiters)
{
char *string = strdup(str);
char **result = NULL;
int n = 0;
char *delim = NULL;
for (int i = 0; string[i]; i++)
if (get_delimiters(string + i, delimiters))
n++;
result = malloc((n * 2 + 2) * sizeof(char *));
if (!result)
return NULL;
result[0] = string;
n = 1;
for (int i = 0; string[i]; i++) {
delim = get_delimiters(string + i, delimiters);
if (delim) {
string[i] = '\0';
result[n++] = delim;
result[n++] = string + i + strlen(delim);
}
}
result[n] = NULL;
return result;
}
result :
[0] 'ls > file'
[1] '&&'
[2] 'foo '
[3] '||'
[4] ' bar'
remember result and string are malloced, so you must free the result and result[0]
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array);
int main()
{
char **split_str;
char *delimiters[] = {
"&&",
"||"
};
int rows_in_returned_array;
split_str = split("ls > file&&foo || bar && abc ||pqwe", delimiters, 2 , &rows_in_returned_array);
int i;
for (i = 0 ; i < rows_in_returned_array ; ++i)
{
printf("\n%s\n", split_str[i]);
}
return 0;
}
char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array)
{
//temporary storage for array to be returned
char temp_store[100][200];
int row = 0;//row size of array that will be returned
char **split_str;
int i, j, k, l, mark = 0;
char temp[100];
for (i = 0 ; str[i] != '\0' ; ++i)
{
//Iterating through all delimiters to check if any is str
for (j = 0 ; j < number_of_delimiters ; ++j )
{
l = i;
for (k = 0 ; delimiters[j][k] != '\0' ; ++k)
{
if (str[i] != delimiters[j][k])
{
break;
}
++l;
}
//This means delimiter is in string
if (delimiters[j][k] == '\0')
{
//store the string before delimiter
strcpy(temp_store[row], &str[mark]);
temp_store[row ++][i - mark] = '\0';
//store string after delimiter
strcpy(temp_store[row], &str[i]);
temp_store[row ++][k] = '\0';
//mark index where this delimiter ended
mark = l;
//Set i to where delimiter ends and break so that outermost loop
//can iterate from where delimiter ends
i = l - 1;
break;
}
}
}
//store the string remaining
strcpy(temp_store[row++], &str[mark]);
//Allocate the split_str and store temp_store into it
split_str = (char **)malloc(row * sizeof(char *));
for (i=0 ; i < row; i++)
{
split_str[i] = (char *)malloc(200 * sizeof(char));
strcpy(split_str[i], temp_store[i]);
}
*number_of_rows_in_return_array = row;
return split_str;
}
This should probably work. Note that I have passed int * number_of_rows_in_return_array by ref because we need to know the row size of the retuned array.
I went into abstraction. First I created a "sentence" library, that allows for manipulating NULL terminated list of strings (char*). I wrote some initial accessors (sentence_init, sentence_size, sentence_free, sentence_add_str etc.).
Then I went to split, witch becomes really, really easy then - if a delimeter is found, add the string up the delimeter to the sentence and add the delimeter to the sentence. Then increment the string pointer position. If the delimeter is not found, add the remaining string to the sentence.
There is a real problem with double pointers tho, because char ** is not implicitly convertible to const char **. For production code, I would probably aim to refactor the code, and try to take const-correctness into account.
#define _GNU_SOURCE 1
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <stdbool.h>
/*
* sentence - list of words
*/
/* ----------------------------------------------------------- */
// if this would be production code, I think I would go with a
// struct word_t { char *word; }; struct sentence_t { struct word_t *words; };
// Note: when sentence_add_* fail - they free *EVERYTHING*, so it doesn't work like realloc
// shared_ptr? Never heard of it.
char **sentence_init(void) {
return NULL;
}
size_t sentence_size(char * const *t) {
if (t == NULL) return 0;
size_t i;
for (i = 0; t[i] != NULL; ++i) {
continue;
}
return i;
}
void sentence_free(char * const *t) {
if (t == NULL) return;
for (char * const *i = t; *i != NULL; ++i) {
free(*i);
}
free((void*)t);
}
void sentence_printex(char * const *t, const char *fmt1, const char *delim, const char *end) {
for (char * const *i = t; *i != NULL; ++i) {
printf(fmt1, *i);
if (*(i + 1) != NULL) {
printf(delim);
}
}
printf(end);
}
void sentence_print(char * const *t) {
sentence_printex(t, "%s", " ", "\n");
}
void sentence_print_quote_words(char * const *t) {
sentence_printex(t, "'%s'", " ", "\n");
}
bool sentence_cmp_const(const char * const *t, const char * const *other) {
const char * const *t_i = t;
const char * const *o_i = other;
while (*t_i != NULL && o_i != NULL) {
if (strcmp(*t_i, *o_i) != 0) {
return false;
}
++t_i;
++o_i;
}
return *t_i == NULL && *o_i == NULL;
}
// thet's always funny, because "dupa" in my language means "as*"
char **sentence_add_strdupped(char **t, char *strdupped) {
const size_t n = sentence_size(t);
const size_t add = 1 + 1;
const size_t new_n = n + add;
void * const pnt = realloc(t, new_n * sizeof(char*));
if (pnt == NULL) goto REALLOC_FAIL;
// we have to have place for terminating NULL pointer
assert(new_n >= 2);
t = pnt;
t[new_n - 2] = strdupped;
t[new_n - 1] = NULL;
// ownership of str goes to t
return t;
// ownership of str stays in the caller
REALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_strlened(char **t, const char *str, size_t len) {
char *strdupped = malloc(len + 1);
if (strdupped == NULL) goto MALLOC_FAIL;
memcpy(strdupped, str, len);
strdupped[len] = '\0';
t = sentence_add_strdupped(t, strdupped);
if (t == NULL) goto SENTENCE_ADD_STRDUPPED_FAIL;
return t;
SENTENCE_ADD_STRDUPPED_FAIL:
free(strdupped);
MALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_str(char **t, const char *str) {
const size_t str_len = strlen(str);
return sentence_add_strlened(t, str, str_len);
}
/* ----------------------------------------------------------- */
/**
* Puff. Run strstr for each of the elements inside NULL delimeters dellist.
* If any returns not NULL, return the pointer as returned by strstr
* And fill dellist_found with the pointer inside dellist (can be NULL).
* Finally! A 3 star award is mine!
*/
char *str_find_any_strings(const char *str,
const char * const *dellist,
const char * const * *dellist_found) {
assert(str != NULL);
assert(dellist != NULL);
for (const char * const *i = &dellist[0]; *i != NULL; ++i) {
const char *found = strstr(str, *i);
if (found != NULL) {
if (dellist_found != NULL) {
*dellist_found = i;
}
// __UNCONST(found)
return (char*)found;
}
}
return NULL;
}
/**
* Split the string str according to the list od delimeters dellist
* #param str
* #param dellist
* #return returns a dictionary
*/
char **split(const char *str, const char * const *dellist) {
assert(str != NULL);
assert(dellist != NULL);
char **sen = sentence_init();
while (*str != '\0') {
const char * const *del_pnt = NULL;
const char *found = str_find_any_strings(str, dellist, &del_pnt);
if (found == NULL) {
// we don't want an empty string to be the last...
if (*str != '\0') {
sen = sentence_add_str(sen, str);
if (sen == NULL) return NULL;
}
break;
}
// Puff, so a delimeter is found at &str[found - str]
const size_t idx = found - str;
sen = sentence_add_strlened(sen, str, idx);
if (sen == NULL) return NULL;
assert(del_pnt != NULL);
const char *del = *del_pnt;
assert(del != NULL);
assert(*del != '\0');
const size_t del_len = strlen(del);
sen = sentence_add_strlened(sen, del, del_len);
if (sen == NULL) return NULL;
str += idx + del_len;
}
return sen;
}
int main()
{
char **sentence = split("ls > file&&foo || bar", (const char*[]){"&&", "||", NULL});
assert(sentence != NULL);
sentence_print_quote_words(sentence);
printf("cmp = %d\n", sentence_cmp_const((void*)sentence, (const char*[]){"ls > file", "&&", "foo ", "||", " bar", NULL}));
sentence_free(sentence);
return 0;
}
The program will output:
'ls > file' '&&' 'foo ' '||' ' bar'
cmp = 1

Remove a substring from a string in C

I want to remove a particular substring from a string for example my main string is "ababccdabce" and I want to remove "abc" from it so it will become "abcde".
I just wanted to know if there is a predefined function in C to do that, and if not, how to do it?
There is no predefined function in C to remove a given substring from a C string, but you can write one using strstr and memmove. Note that if you remove the substring in place, you cannot use memcpy nor strcpy because these have undefined behavior if the source and destination arrays overlap.
Here is the code:
#include <string.h>
char *strremove(char *str, const char *sub) {
size_t len = strlen(sub);
if (len > 0) {
char *p = str;
while ((p = strstr(p, sub)) != NULL) {
memmove(p, p + len, strlen(p + len) + 1);
}
}
return str;
}
Note that the resulting string may contain the substring as is the case in your example.
Netherwire suggested an optimisation:
char *strremove(char *str, const char *sub) {
size_t len = strlen(sub);
if (len > 0) {
char *p = str;
size_t size = 0;
while ((p = strstr(p, sub)) != NULL) {
size = (size == 0) ? (p - str) + strlen(p + len) + 1 : size - len;
memmove(p, p + len, size - (p - str));
}
}
return str;
}
Further honing the code, I came up with an even more efficient version using the 2 finger-method: only copying the fragments between matches starting after the first match:
char *strremove(char *str, const char *sub) {
char *p, *q, *r;
if (*sub && (q = r = strstr(str, sub)) != NULL) {
size_t len = strlen(sub);
while ((r = strstr(p = r + len, sub)) != NULL) {
memmove(q, p, r - p);
q += r - p;
}
memmove(q, p, strlen(p) + 1);
}
return str;
}
Here is the same method without any calls to memmove:
char *strremove(char *str, const char *sub) {
char *p, *q, *r;
if (*sub && (q = r = strstr(str, sub)) != NULL) {
size_t len = strlen(sub);
while ((r = strstr(p = r + len, sub)) != NULL) {
while (p < r)
*q++ = *p++;
}
while ((*q++ = *p++) != '\0')
continue;
}
return str;
}

Find a replace a substring C

I am trying to do a find a replace but not just for strings but for substrings also. So the program I am working on looks for the word "bar" and append "foo" in front of any instance of "bar". So my approach is that instead of actually appending the string, I replace the whole string "bar" with "foobar". The code I have right now (not fully tested), should find and replace all occurrences of "bar" with "foobar". However, if there is a string that looks like "bar123abc", it does not replace it with "foobar123abc".
This is the code I have:
static void replaceAllString(char *buf, const char *orig, const char *replace)
{
int olen, rlen;
char *s, *d;
char *tmpbuf;
if (!buf || !*buf || !orig || !*orig || !replace)
return;
tmpbuf = malloc(strlen(buf) + 1);
if (tmpbuf == NULL)
return;
olen = strlen(orig);
rlen = strlen(replace);
s = buf;
d = tmpbuf;
while (*s) {
if (strncmp(s, orig, olen) == 0) {
strcpy(d, replace);
s += olen;
d += rlen;
}
else
*d++ = *s++;
}
*d = '\0';
strcpy(buf, tmpbuf);
free(tmpbuf);
}
Here's how I might do it:
static char *replaceAll(char *buf, int buflen, const char *orig, const char *replace) {
if (!buf || !*buf || !orig || !*orig || !replace) return buf;
int olen = strlen(orig), rlen = strlen(replace);
int max = strlen(buf) + 1;
if (olen < rlen) {
max = rlen * ((max / olen) + 1) + 1;
}
char *tmpbuf = malloc(max);
char *bp = buf, *tp = tmpbuf, *sp;
while (NULL != (sp = strstr(bp, orig))) {
int f = sp - bp;
memmove(tp, bp, f);
memmove(tp + f, replace, rlen);
tp += f + rlen;
bp += f + olen; // no recursive replacement
}
strcpy(tp, bp);
strncpy(buf, tmpbuf, buflen);
free(tmpbuf);
return buf;
}
char haystack[128] = "123bar456bar7ba8ar9bar0";
int main(int ac, char *av[]) {
printf("%s\n", replaceAll(haystack, sizeof haystack, "bar", "foobar"));
}
Note: passing buflen is NOT optional! You DO NOT write to memory buffers you don't know the length of. If I'm interviewing C programmers, this would be an instant "no hire". tmpbuf is allocated the length max, crudely calculated for the worst case (something like "barbarbar"). The heavy lifting here is done by strstr().

Delete space from string in ansi C

I'm trying to write simple function trim space in the string in ansi C.
My str_utis.h:
#include <string.h>
const char* trim_str(char *input_str);
My str_utils.c:
const char* trim_str(char* input_str){
char* str = NULL;
int len = strlen(input_str);
int i = 0;
for (i = 0; i < len - 1; i++){
if (input_str[i] == ' ')
;
else
str += input_str[i];
}
return str;
}
When i try to execute it i got segfault:
int main(int argc, char** argv) {
const char* a = trim_str("Hey this is string");
printf("%s", a);
return 0;
}
why is it wrong? how can i write it correctly?
Thank you.
You cannot modify a string literal. It's UB. Copy the string out and modify its contents. Change
char* str = NULL;
int len = strlen(input_str);
to
size_t len = strlen( input_str );
char *str = malloc( len + 1 );
and then proceed to copy out the non-whitespace contents.
str isn't allocated, and you can't use += to append to a string anyway. Read up on strncat.
Go through this. this will remove blank spaces and tabs from both ends of the input string.
const char* trim_str(char* input_str){
char* str = NULL;
int len = strlen(input_str);
str = (char *)malloc(len+1);
int i = 0;
while(i < len && (input_str[i]==' ' || input_str[i]=='\t')){
++i;
}
int j = 0;
while(i< len && input_str[i]!=' ' && input_str[i]!='\t'){
str[j]= input_str[i];
++j;
++i;
}
str[j] = '\0';
return str;
}
first of all you need to get the new string size without spaces:
You don't want to allocate big strings if you don't have to.
const char* trim_str(char* input_str){
char* str = NULL;
int len = strlen(input_str);
int i = 0;
int newSize = 0;
for (i = 0; i < len - 1; i++){
if (input_str[i] == ' ')
;
else
newSize++;
}
str = malloc( newSize+ 1 );
str[newSize] = '\0'
// put the code of the copy bytes here...
return str;
}
char *trimdupstr(char *str)
{
size_t len, src,dst;
char *new;
if (!str) return NULL;
len = strlen (str);
new = malloc(len+1);
if (!new) return NULL;
for(src=dst=0; new[dst] = str[src++]; ) {
if (new[dst] != ' ') dst++;
}
return new;
}
And this one also removes tabs and CR/LF's:
char *trimdupstr(char *str)
{
size_t len, src,dst;
char *new;
if (!str) return NULL;
len = strlen (str);
new = malloc(len+1);
for (src=dst=0; str[src]; dst += len) {
len = strspn(str+src, "\t \r\n" );
src += len;
len = strcspn(str+src, "\t \r\n" );
memcpy (new+dst, str+src, len);
src += len;
}
new[dst] = 0;
return new;
}
This is a pretty safe method.
void rtrim(char instrng[]) {
assert (instrng != NULL);
if (strlen(instrng) == 0) {
return;
}
while (instrng[strlen(instrng)-1] == '\n' || instrng[strlen(instrng)-1] == ' ' ) {
instrng[strlen(instrng)-1] = '\0';
}
return;
}

Resources