I want to remove a particular substring from a string for example my main string is "ababccdabce" and I want to remove "abc" from it so it will become "abcde".
I just wanted to know if there is a predefined function in C to do that, and if not, how to do it?
There is no predefined function in C to remove a given substring from a C string, but you can write one using strstr and memmove. Note that if you remove the substring in place, you cannot use memcpy nor strcpy because these have undefined behavior if the source and destination arrays overlap.
Here is the code:
#include <string.h>
char *strremove(char *str, const char *sub) {
size_t len = strlen(sub);
if (len > 0) {
char *p = str;
while ((p = strstr(p, sub)) != NULL) {
memmove(p, p + len, strlen(p + len) + 1);
}
}
return str;
}
Note that the resulting string may contain the substring as is the case in your example.
Netherwire suggested an optimisation:
char *strremove(char *str, const char *sub) {
size_t len = strlen(sub);
if (len > 0) {
char *p = str;
size_t size = 0;
while ((p = strstr(p, sub)) != NULL) {
size = (size == 0) ? (p - str) + strlen(p + len) + 1 : size - len;
memmove(p, p + len, size - (p - str));
}
}
return str;
}
Further honing the code, I came up with an even more efficient version using the 2 finger-method: only copying the fragments between matches starting after the first match:
char *strremove(char *str, const char *sub) {
char *p, *q, *r;
if (*sub && (q = r = strstr(str, sub)) != NULL) {
size_t len = strlen(sub);
while ((r = strstr(p = r + len, sub)) != NULL) {
memmove(q, p, r - p);
q += r - p;
}
memmove(q, p, strlen(p) + 1);
}
return str;
}
Here is the same method without any calls to memmove:
char *strremove(char *str, const char *sub) {
char *p, *q, *r;
if (*sub && (q = r = strstr(str, sub)) != NULL) {
size_t len = strlen(sub);
while ((r = strstr(p = r + len, sub)) != NULL) {
while (p < r)
*q++ = *p++;
}
while ((*q++ = *p++) != '\0')
continue;
}
return str;
}
Related
I am currently recoding srtcat() from the standard C library and I have set up some checks to avoid overlap problems. The problem is that my program still enters the error handling.
Here is the code:
char *my_strcat(char *restrict dest, const char *restrict src)
{
size_t dest_len = 0, src_len = 0;
char *p = dest;
src_len = my_strlen(src);
if (!dest || !src)
return NULL;
dest_len = my_strlen(dest);
if (src >= dest && src < dest + dest_len) {
return NULL;
}
if (dest >= src && dest < src + src_len) {
return NULL;
}
while (*p != '\0') p++, dest_len++;
if (dest_len + src_len + 1 > sizeof(dest))
return NULL;
p = dest + dest_len;
while (*src != '\0')
*p++ = *src++;
*p = '\0';
return dest;
}
size_t my_strlen(const char *s)
{
size_t count = 0;
if (s != NULL) {
while (*s != 0) {
count++;
s++;
}
}
return count;
}
I tested this way :
int main(int argc, char **argv)
{
const char *src = "Hello";
char dest[100] = " world!";
char *test = my_strcat(dest, src);
printf("Src : %s Dest : %s\n", src, dest);
printf("Return adress : %p, Value : %s\n", test, test);
return 0;
}
According to gdb :
if (src >= dest && src < dest + dest_len)
1: dest = 0x7fffffffda70 " world!"
2: src = 0x555555557004 "Hello"
3: dest_len = 0
4: src_len = 5
Output
Src : Hello Dest : world!
Return adress : (nil), Value : (null)
Do you see the problem?
Update
Following your suggestions I have modified the code like this:
char *my_strcat(char *restrict dest, const char *restrict src, size_t d_size)
{
size_t dest_len = 0, src_len = 0;
char *p = dest;
if (!dest || !src)
return NULL;
src_len = my_strlen(src);
dest_len = my_strlen(dest);
if (src >= dest && src < dest + dest_len) {
return NULL;
}
if (dest >= src && dest < src + src_len) {
return NULL;
}
while (*p != '\0') p++, dest_len++;
if (dest_len + src_len + 1 > d_size)
return NULL;
p = dest + dest_len;
while (*src != '\0')
*p++ = *src++;
*p = '\0';
return dest;
}
And in the main : char *test = my_strcat(dest, src, sizeof(dest));
But it still doesn't work :
Src : Hello Dest : world!
Return adress : 0x7fff74bc5650, Value : world!
Having tried to guide toward understanding this problem, it seems best to present what should be working code (for study.) Sometimes too many words merely muddle the situation:
char *my_strcat(char *restrict dest, const char *restrict src, size_t d_size) {
if( !dest || !src )
return NULL;
size_t src_len = strlen( src );
size_t dest_len = strlen( dest );
if( dest_len + src_len + 1 > d_size )
return NULL;
char *p = dest + dest_len;
while( (*p++ = *src++ ) != '\0' )
;
return dest;
}
int main() {
const char *src = "Hello";
char dest[100] = " world!";
printf("Src : %s Dest : %s\n", src, dest);
char *test = my_strcat( dest, src, sizeof dest );
if( test )
printf("Value : %s\n", test );
return 0;
}
Now, one can experiment by shrinking the size of dest to something larger than " world!" but smaller than " world!Hello"... Perhaps 9 bytes???
And, now that the concatenation should be working (into a big enough buffer), adding the code to ensure there is no overlap of the actual character arrays. Known is the size of dest, and the length of src is measured.
dest_len + src_len + 1 > sizeof(dest)
sizeof(dest) is the size of the pointer sizeof(char*). If you want to check if the dest will be overflowed, you have to pass the size as an argument. See strlcpy or strncpy.
src >= dest
Note that comparing pointers that do not point to the same array is technically invalid. To bring some breeze of validity, you can do (uintptr_t)stc >= (uintptr_t)dest. How does pointer comparison work in C? Is it ok to compare pointers that don't point to the same array? Is comparing two pointers with < undefined behavior if they are both cast to an integer type? Why does comparing pointers with undefined behavior still give correct results? etc.
I want to split a string by a delimiter and keep the delimiter in the token list
I have a function that do the same thing as strtok but with a string delimiter (instead of a set of chars) but it doesn't keep the delimiter and can't take an array of delimiters as argument
This is a function that split a string into tokens as strtok does but taking a delimiter
static char *strtokstr(char *str, char *delimiter)
{
static char *string;
char *end;
char *ret;
if (str != NULL)
string = str;
if (string == NULL)
return string;
end = strstr(string, delimiter);
if (end == NULL) {
char *ret = string;
string = NULL;
return ret;
}
ret = string;
*end = '\0';
string = end + strlen(delimiter);
return ret;
}
I want to have a char **split(char *str, char **delimiters_list) that split a string by a set of delimiters and keep the delimiter in the token list
I think I also need a function to count the number of tokens so i can malloc the return of my split function
// delimiters is an array containing ["&&", "||" and NULL]
split("ls > file&&foo || bar", delimiters) should return an array containing ["ls > file", "&&", "foo ", "||", " bar"]
How that can be achieved ?
First, you have a memory error here :
static char *string;
if (str != NULL)
string = str;
if (string == NULL)
return string;
If stris NULL, string is not initialised and you use a uninitialised value in comparaison.
if you want copy a string, you must use the strdup function, the = will just copy the pointer and not the pointer content.
And here a way to do it :
#include <stdlib.h>
#include <string.h>
char *get_delimiters(char *str, char **delims)
{
for (int i = 0; delims[i]; i++)
if (!strncmp(str, delims[i], strlen(delims[i])))
return delims[i];
return NULL;
}
char **split(char *str, char **delimiters)
{
char *string = strdup(str);
char **result = NULL;
int n = 0;
char *delim = NULL;
for (int i = 0; string[i]; i++)
if (get_delimiters(string + i, delimiters))
n++;
result = malloc((n * 2 + 2) * sizeof(char *));
if (!result)
return NULL;
result[0] = string;
n = 1;
for (int i = 0; string[i]; i++) {
delim = get_delimiters(string + i, delimiters);
if (delim) {
string[i] = '\0';
result[n++] = delim;
result[n++] = string + i + strlen(delim);
}
}
result[n] = NULL;
return result;
}
result :
[0] 'ls > file'
[1] '&&'
[2] 'foo '
[3] '||'
[4] ' bar'
remember result and string are malloced, so you must free the result and result[0]
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array);
int main()
{
char **split_str;
char *delimiters[] = {
"&&",
"||"
};
int rows_in_returned_array;
split_str = split("ls > file&&foo || bar && abc ||pqwe", delimiters, 2 , &rows_in_returned_array);
int i;
for (i = 0 ; i < rows_in_returned_array ; ++i)
{
printf("\n%s\n", split_str[i]);
}
return 0;
}
char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array)
{
//temporary storage for array to be returned
char temp_store[100][200];
int row = 0;//row size of array that will be returned
char **split_str;
int i, j, k, l, mark = 0;
char temp[100];
for (i = 0 ; str[i] != '\0' ; ++i)
{
//Iterating through all delimiters to check if any is str
for (j = 0 ; j < number_of_delimiters ; ++j )
{
l = i;
for (k = 0 ; delimiters[j][k] != '\0' ; ++k)
{
if (str[i] != delimiters[j][k])
{
break;
}
++l;
}
//This means delimiter is in string
if (delimiters[j][k] == '\0')
{
//store the string before delimiter
strcpy(temp_store[row], &str[mark]);
temp_store[row ++][i - mark] = '\0';
//store string after delimiter
strcpy(temp_store[row], &str[i]);
temp_store[row ++][k] = '\0';
//mark index where this delimiter ended
mark = l;
//Set i to where delimiter ends and break so that outermost loop
//can iterate from where delimiter ends
i = l - 1;
break;
}
}
}
//store the string remaining
strcpy(temp_store[row++], &str[mark]);
//Allocate the split_str and store temp_store into it
split_str = (char **)malloc(row * sizeof(char *));
for (i=0 ; i < row; i++)
{
split_str[i] = (char *)malloc(200 * sizeof(char));
strcpy(split_str[i], temp_store[i]);
}
*number_of_rows_in_return_array = row;
return split_str;
}
This should probably work. Note that I have passed int * number_of_rows_in_return_array by ref because we need to know the row size of the retuned array.
I went into abstraction. First I created a "sentence" library, that allows for manipulating NULL terminated list of strings (char*). I wrote some initial accessors (sentence_init, sentence_size, sentence_free, sentence_add_str etc.).
Then I went to split, witch becomes really, really easy then - if a delimeter is found, add the string up the delimeter to the sentence and add the delimeter to the sentence. Then increment the string pointer position. If the delimeter is not found, add the remaining string to the sentence.
There is a real problem with double pointers tho, because char ** is not implicitly convertible to const char **. For production code, I would probably aim to refactor the code, and try to take const-correctness into account.
#define _GNU_SOURCE 1
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <stdbool.h>
/*
* sentence - list of words
*/
/* ----------------------------------------------------------- */
// if this would be production code, I think I would go with a
// struct word_t { char *word; }; struct sentence_t { struct word_t *words; };
// Note: when sentence_add_* fail - they free *EVERYTHING*, so it doesn't work like realloc
// shared_ptr? Never heard of it.
char **sentence_init(void) {
return NULL;
}
size_t sentence_size(char * const *t) {
if (t == NULL) return 0;
size_t i;
for (i = 0; t[i] != NULL; ++i) {
continue;
}
return i;
}
void sentence_free(char * const *t) {
if (t == NULL) return;
for (char * const *i = t; *i != NULL; ++i) {
free(*i);
}
free((void*)t);
}
void sentence_printex(char * const *t, const char *fmt1, const char *delim, const char *end) {
for (char * const *i = t; *i != NULL; ++i) {
printf(fmt1, *i);
if (*(i + 1) != NULL) {
printf(delim);
}
}
printf(end);
}
void sentence_print(char * const *t) {
sentence_printex(t, "%s", " ", "\n");
}
void sentence_print_quote_words(char * const *t) {
sentence_printex(t, "'%s'", " ", "\n");
}
bool sentence_cmp_const(const char * const *t, const char * const *other) {
const char * const *t_i = t;
const char * const *o_i = other;
while (*t_i != NULL && o_i != NULL) {
if (strcmp(*t_i, *o_i) != 0) {
return false;
}
++t_i;
++o_i;
}
return *t_i == NULL && *o_i == NULL;
}
// thet's always funny, because "dupa" in my language means "as*"
char **sentence_add_strdupped(char **t, char *strdupped) {
const size_t n = sentence_size(t);
const size_t add = 1 + 1;
const size_t new_n = n + add;
void * const pnt = realloc(t, new_n * sizeof(char*));
if (pnt == NULL) goto REALLOC_FAIL;
// we have to have place for terminating NULL pointer
assert(new_n >= 2);
t = pnt;
t[new_n - 2] = strdupped;
t[new_n - 1] = NULL;
// ownership of str goes to t
return t;
// ownership of str stays in the caller
REALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_strlened(char **t, const char *str, size_t len) {
char *strdupped = malloc(len + 1);
if (strdupped == NULL) goto MALLOC_FAIL;
memcpy(strdupped, str, len);
strdupped[len] = '\0';
t = sentence_add_strdupped(t, strdupped);
if (t == NULL) goto SENTENCE_ADD_STRDUPPED_FAIL;
return t;
SENTENCE_ADD_STRDUPPED_FAIL:
free(strdupped);
MALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_str(char **t, const char *str) {
const size_t str_len = strlen(str);
return sentence_add_strlened(t, str, str_len);
}
/* ----------------------------------------------------------- */
/**
* Puff. Run strstr for each of the elements inside NULL delimeters dellist.
* If any returns not NULL, return the pointer as returned by strstr
* And fill dellist_found with the pointer inside dellist (can be NULL).
* Finally! A 3 star award is mine!
*/
char *str_find_any_strings(const char *str,
const char * const *dellist,
const char * const * *dellist_found) {
assert(str != NULL);
assert(dellist != NULL);
for (const char * const *i = &dellist[0]; *i != NULL; ++i) {
const char *found = strstr(str, *i);
if (found != NULL) {
if (dellist_found != NULL) {
*dellist_found = i;
}
// __UNCONST(found)
return (char*)found;
}
}
return NULL;
}
/**
* Split the string str according to the list od delimeters dellist
* #param str
* #param dellist
* #return returns a dictionary
*/
char **split(const char *str, const char * const *dellist) {
assert(str != NULL);
assert(dellist != NULL);
char **sen = sentence_init();
while (*str != '\0') {
const char * const *del_pnt = NULL;
const char *found = str_find_any_strings(str, dellist, &del_pnt);
if (found == NULL) {
// we don't want an empty string to be the last...
if (*str != '\0') {
sen = sentence_add_str(sen, str);
if (sen == NULL) return NULL;
}
break;
}
// Puff, so a delimeter is found at &str[found - str]
const size_t idx = found - str;
sen = sentence_add_strlened(sen, str, idx);
if (sen == NULL) return NULL;
assert(del_pnt != NULL);
const char *del = *del_pnt;
assert(del != NULL);
assert(*del != '\0');
const size_t del_len = strlen(del);
sen = sentence_add_strlened(sen, del, del_len);
if (sen == NULL) return NULL;
str += idx + del_len;
}
return sen;
}
int main()
{
char **sentence = split("ls > file&&foo || bar", (const char*[]){"&&", "||", NULL});
assert(sentence != NULL);
sentence_print_quote_words(sentence);
printf("cmp = %d\n", sentence_cmp_const((void*)sentence, (const char*[]){"ls > file", "&&", "foo ", "||", " bar", NULL}));
sentence_free(sentence);
return 0;
}
The program will output:
'ls > file' '&&' 'foo ' '||' ' bar'
cmp = 1
I'm trying to split lines of the type:
GM 1 2 3 ! this is a comment
to separate out the comment section. There are several possible comment delimiters, !, ' and #. strtok is the obvious solution for this:
card->card_str = strtok(line_buf, "!'#");
producing GM 1 2 3 and this is a comment. However, for this role, I need to keep the delimiter character in the second string, so in this case ! this is a comment. Is there an easy way to do this?
strtok is rarely the right tool for parsing jobs because it has many quirks and side effects.
For your goal, you can use strcspn():
void parse_input_line(const char *line) {
size_t len = strcspn(line, "!'#");
char *p = malloc(len + 1);
if (p != NULL) {
memcpy(p, line, len);
p[len] = '\0';
card->card_str = p;
card->card_comment = p[len] ? strdup(p + len) : NULL;
}
}
Alternately, you can use strpbrk:
void parse_input_line(const char *line) {
const char *sep = strpbrk(line, "!'#");
if (sep == NULL) {
// no comment
card->card_str = strdup(line);
card->card_comment = NULL;
} else {
size_t len = sep - line;
char *p = malloc(len + 1);
if (p != NULL) {
memcpy(p, line, len);
p[len] = '\0';
card->card_str = p;
card->card_comment = strdup(sep);
}
}
}
You can use strndup to make the code more readable:
void parse_input_line(const char *line) {
size_t len = strcspn(line, "!'#");
if (p[len] == '\0') {
/* no comment */
card->card_str = strdup(line);
card->card_comment = NULL;
} else {
card->card_str = strndup(line, len);
card->card_comment = strdup(p + len);
}
}
strndup may not be available on all systems, here is a simple implementation:
size_t strnlen(const char *s, size_t n) {
size_t len;
for (len = 0; len < n; len++) {
if (s[len] == '\0')
break;
}
return len;
}
char *strndup(const char *s, size_t n) {
size_t len = strnlen(s, n);
char *p = malloc(len + 1);
if (p != NULL) {
memcpy(p, s, len);
p[len] = '\0';
}
return p;
}
I had to write data to a CSV file in low level C code. I share the little snippet for cases external libraries like OpenCSV are not suitable.
To write to file instead of sprintf(s,...) use fprintf(f,...)
#include <memory.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
//replace string a with string b in str
//str must have enough space
char* _replace(char* str, char* a, char* b, int len, int lena, int lenb)
{
for (char* p = str; p = strstr(p, a);)
{
if (lena != lenb) // shift end as needed
memmove(p + lenb, p + lena, len + 1 - (p + lena - str) + 1);
memcpy(p, b, lenb);
p += lenb;
}
return str;
}
//allocate space which must be free'd
//wrap in " and replace " by "" if necessary
char* _csv_alloc(char* str)
{
int len = strlen(str);
char *_str = (char*)malloc(2 * len + 1 + 2);
bool wrap = false;
if (strchr(str, ';') != NULL || strchr(str, '\"') != NULL)
wrap = true;
if (wrap)
{
_str[0] = '\"';
memcpy(_str + 1, str, len + 1);
}
else
memcpy(_str, str, len + 1);
_replace(wrap ? (_str + 1) : _str, "\"", "\"\"", len, 1, 2);
if (wrap)
{
len = strlen(_str);
_str[len] = '\"';
_str[len + 1] = '\0';
}
return _str;
}
int main()
{
char *c1 = "Nothing to escape";
char *c2 = "Here the ; entails wrapping";
char *c3 = "Here the \" entails wrapping and escaping";
char *_c1 = _csv_alloc(c1);
char *_c2 = _csv_alloc(c2);
char *_c3 = _csv_alloc(c3);
char res[0xFF] = "";
sprintf(res, "%s;%s;%s\n", _c1, _c2, _c3);
free(_c1);
free(_c2);
free(_c3);
assert(strcmp(res, "Nothing to escape;\"Here the ; entails wrapping\";\"Here the \"\" entails wrapping and escaping\"\n") ==0);
return 0;
}
I am trying to do a find a replace but not just for strings but for substrings also. So the program I am working on looks for the word "bar" and append "foo" in front of any instance of "bar". So my approach is that instead of actually appending the string, I replace the whole string "bar" with "foobar". The code I have right now (not fully tested), should find and replace all occurrences of "bar" with "foobar". However, if there is a string that looks like "bar123abc", it does not replace it with "foobar123abc".
This is the code I have:
static void replaceAllString(char *buf, const char *orig, const char *replace)
{
int olen, rlen;
char *s, *d;
char *tmpbuf;
if (!buf || !*buf || !orig || !*orig || !replace)
return;
tmpbuf = malloc(strlen(buf) + 1);
if (tmpbuf == NULL)
return;
olen = strlen(orig);
rlen = strlen(replace);
s = buf;
d = tmpbuf;
while (*s) {
if (strncmp(s, orig, olen) == 0) {
strcpy(d, replace);
s += olen;
d += rlen;
}
else
*d++ = *s++;
}
*d = '\0';
strcpy(buf, tmpbuf);
free(tmpbuf);
}
Here's how I might do it:
static char *replaceAll(char *buf, int buflen, const char *orig, const char *replace) {
if (!buf || !*buf || !orig || !*orig || !replace) return buf;
int olen = strlen(orig), rlen = strlen(replace);
int max = strlen(buf) + 1;
if (olen < rlen) {
max = rlen * ((max / olen) + 1) + 1;
}
char *tmpbuf = malloc(max);
char *bp = buf, *tp = tmpbuf, *sp;
while (NULL != (sp = strstr(bp, orig))) {
int f = sp - bp;
memmove(tp, bp, f);
memmove(tp + f, replace, rlen);
tp += f + rlen;
bp += f + olen; // no recursive replacement
}
strcpy(tp, bp);
strncpy(buf, tmpbuf, buflen);
free(tmpbuf);
return buf;
}
char haystack[128] = "123bar456bar7ba8ar9bar0";
int main(int ac, char *av[]) {
printf("%s\n", replaceAll(haystack, sizeof haystack, "bar", "foobar"));
}
Note: passing buflen is NOT optional! You DO NOT write to memory buffers you don't know the length of. If I'm interviewing C programmers, this would be an instant "no hire". tmpbuf is allocated the length max, crudely calculated for the worst case (something like "barbarbar"). The heavy lifting here is done by strstr().