IOT instruction when converting string to word array - arrays

I'm coming back to you about my function char **my_str_to_word_array(char *str). The purpose is to separate the string at each non-printable ASCII character and include the above in a new row of the double dimensional array.
Non-printable ASCII characters should be used as separators and should not be included in the line.
Example:
char *test = "My name is John Doe.\nI have 0 GPA.\nI will survive." ;
char **array = my_str_to_word_array(test) ;
array[0] = "My name is John Doe." (zero terminated string)
array[1] = "I have 0 GPA." (zero terminated string)
array[2] = "I will survive." (zero terminated string)
array[3] = NULL
I have 2 problems:
If in my test main() I have a printf() below the call to my_str_to_word_array, the format passed to printf() will be included in the array. So I conclude that there is a memory read error.
When I try to free() the array I get an error :
double free or corruption (out)
[1] 33429 IOT instruction (core dumped) ./libmy
size_t get_words_number(char const *str)
{
size_t count = 0;
const char *i = str;
while (*i != 0) {
if (isprint(*i)) {
count++;
}
while (*i != 0 && isprint(*i)) {
i++;
}
i++;
}
return count;
}
char **free_corrupted_array(char **array, size_t i)
{
size_t j = 0;
while (j < i) {
free(array[j]);
j++;
}
free(array);
return NULL;
}
char **fill_array(char **array, const char *str, size_t word_count)
{
size_t word_size = 0, j = 0;
const char *i = str;
while (j < word_count) {
while (*i != 0 && isprint(*i)) {
word_size++;
i++;
}
array[j] = strndup(i - word_size, word_size);
if (!array[j]) {
return free_corrupted_array(array, j);
}
word_size = 0;
j++;
while (!isprint(*i)) {
i++;
}
}
array[j] = NULL;
return array;
}
char **my_str_to_word_array(char const *str)
{
char **word_array = NULL;
size_t word_count = 0;
if (!str) {
return NULL;
}
word_count = get_words_number(str);
word_array = malloc(word_count * sizeof(char *));
if (!word_array) {
return NULL;
}
word_array = fill_array(word_array, str, word_count);
return word_array;
}
void my_free_word_array(char **word_array)
{
if (!word_array) {
return;
}
while (*word_array != NULL) {
free(*word_array);
word_array++;
}
free(word_array);
}
int main(int argc, char **argv)
{
const char *test = "My name is John Doe.\nI have 0 GPA.\nI will survive.";
char **word_array = my_str_to_word_array(test);
while (*word_array != NULL) {
printf("%s\n", *word_array);
word_array++;
}
printf("Test print original size %lu\n", strlen(test));
my_free_word_array(word_array);
return 0;
}
And the output :
My name is John Doe.
I have 0 GPA.
I will survive.
Test print original size %lu
Test print original size 50
double free or corruption (out)
[1] 33429 IOT instruction (core dumped) ./libmy
Do you see the problem?

Errors:
get_words_number goes out of bounds (off by one) and may read arbitrary memory after your string (check with the example I included in main).
You need an additional slot in your array to put there a terminating NULL.
Stop thrashing your input pointer if you later need it (both in my_free_word_array and in the printing loop in main).
EDITED: as Fe2O3 commented, I missed another bug in the fill_array function. You should also ensure that *i!=0 in the last loop.
Suggestions:
Next time make a Minimal, Reproducible Example by including all required headers;
strndup is not standard (unless you have __STDC_ALLOC_LIB__ and define __STDC_WANT_LIB_EXT2__ to 1).
You don't need the free_corrupted_array function at all.
EDITED: it's useless to check that *i!=0 if you are also checking that isprint(*i). 0 is not printable, so no need for the first check.
#define _CRT_SECURE_NO_WARNINGS
#ifdef __STDC_ALLOC_LIB__
#define __STDC_WANT_LIB_EXT2__ 1
#else
#include <stdlib.h>
#include <string.h>
char *strndup(const char *str, size_t size)
{
return strncpy(calloc(size + 1, 1), str, size);
}
#endif
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
size_t get_words_number(char const *str)
{
size_t count = 0;
const char *i = str;
while (*i != 0) {
if (isprint(*i)) {
count++;
}
while (*i != 0 && isprint(*i)) {
i++;
}
if (*i != 0) { // <--- This was missing
i++;
}
}
return count;
}
void my_free_word_array(char **word_array) // <--- Moved up
{
if (!word_array) {
return;
}
for (size_t i = 0; word_array[i] != NULL; ++i) { // <--- Stop thrashing word_array
free(word_array[i]);
}
free(word_array);
}
char **fill_array(char **array, const char *str, size_t word_count)
{
size_t word_size = 0, j = 0;
const char *i = str;
while (j < word_count) {
while (*i != 0 && isprint(*i)) {
word_size++;
i++;
}
array[j] = strndup(i - word_size, word_size);
if (!array[j]) {
my_free_word_array(array); // <--- No need for another free here
return NULL;
}
word_size = 0;
j++;
while (*i != 0 && !isprint(*i)) {
i++;
}
}
array[j] = NULL;
return array;
}
char **my_str_to_word_array(char const *str)
{
char **word_array = NULL;
size_t word_count = 0;
if (!str) {
return NULL;
}
word_count = get_words_number(str);
word_array = malloc((word_count + 1) * sizeof(char *)); // <--- You need a +1 here
if (!word_array) {
return NULL;
}
word_array = fill_array(word_array, str, word_count);
return word_array;
}
int main(int argc, char **argv)
{
char test[] = "My name is John Doe.\nI have 0 GPA.\nI will survive.\nThis will be removed from the string";
*strrchr(test,'\n') = 0;
char **word_array = my_str_to_word_array(test);
if (word_array) {
for (size_t i = 0; word_array[i] != NULL; ++i) { // <--- Stop thrashing word_array
printf("%s\n", word_array[i]);
}
printf("Test print original size %zu\n", strlen(test));
my_free_word_array(word_array);
}
return 0;
}

OP's code missed a check for a null character. #Costantino Grana
Candidate get_words_number() correction and simplification:
Count transitions from "non-word" to "word".
Use unsigned char* for defined use for all characters in is...() functions.
#include <ctype.h>
#include <stdbool.h>
size_t get_words_number(char const *str) {
const unsigned char *ustr = (const unsigned char *) str;
size_t count = 0;
bool previous_not_a_word = true;
while (*ustr) {
count += previous_not_a_word && isprint(*ustr);
previous_not_a_word = !isprint(*ustr);
ustr++;
}
return count;
}

The biggest problem with the OP code is that it has been fractured into so many helper functions that it is almost unreadable. A simple process has been muddied by this fragmentation.
Below is a "single pass" version that solves this problem. It does not involve helper functions with their parameters and maddening variable names.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
int main() {
// OP string still as "string literal"
char *test = "My name is John Doe.\nI have 0 GPA.\nI will survive.";
// mutable copy of that string
char *copy = malloc( strlen( test ) + 1 ); // Verify!
strcpy( copy, test );
// get one element that is NULL
size_t cnt = 0;
char **arr = calloc( ++cnt, sizeof *arr ); // Verify!!
// chop copy on separators
char *cp = copy;
while( *cp ) {
// skip leading/trailing separators
while( *cp && !isprint( *cp ) ) cp++;
if( !*cp ) break; // was 1 or more trailing seps
// search for end of segment
char *ep = cp;
while( isprint( *ep ) ) ep++;
// remember if this is the final segment
bool atEnd = *ep == '\0';
// terminate segment and store pointer
*ep = '\0';
arr = realloc( arr, ++cnt * sizeof *arr ); // Verify!!!
arr[ cnt-2 ] = cp;
arr[ cnt-1 ] = NULL;
// move on (only if there is more to examine).
cp = ep + !atEnd;
}
// output
cnt = 0;
do
printf( "%d: %s\n", cnt, arr[ cnt ] ? arr[ cnt ] : "END OF ARRAY" );
while( arr[ cnt++ ] );
// cleanup
free( arr );
free( copy );
return 0;
}
The crucial verifications of return values from the heap allocation functions have been deliberately omitted to improve the clarity of this example code. Those verifications are left as an exercise for the reader.

Related

string splitter in C - dynamic memory

I am trying to enhance the string splitter by splits on : char. Original version can be found at string splitter - how is it working
I do not want to use MAX_TOKEN_SIZE, I want the buffer to be just enough to hold each token. I added malloc and realloc as follows but I am getting free(): double free detected in tcache 2 error which I do not understand. How am I double freeing ? Thanks for all your help.
PS: Based on Gerhardh's comments, I modified the code as follows, but now I am getting segfault.
PS: Based on user3121023's comments, I added parenthesis around *token in 2 places and it works now.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
# define GROWBY 32
const char* splitter(const char *str, char delimiter, char **token) {
size_t i = 0;
size_t buflen = 32;
while (*str) {
if ( i == buflen) {
buflen += GROWBY;
printf("gowing buffer\n");
char *new_token = realloc(*token, buflen * sizeof **token);
if (new_token == NULL){
fprintf(stderr, "Out of Memory");
abort();
}
*token = new_token; //set the new pointer to old pointer
}
char c = *(str++);
if (c == delimiter)
break;
(*token)[i++] = c; //user3121023
}
(*token)[i] = '\0'; /* set the null terminator, user3121023 */
return str;
}
int main(){
const char *env =
"/bin/svsgerertegdfyufdujhdcjxbcn:/sbin:::/usr/bin/46526vw67vxshgshnxxcxhcgbxhcbxn";
while (*env){
char *token = malloc(GROWBY * sizeof(char));
env = splitter(env, ':', &token);
if (token[0] == '\0') {
strcpy(token, "./");
}
printf("%s\n", token) ;
free(token);
}
return 0;
}
Try using strcspn to advance to the next delimiter.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const char* splitter(const char *str, char *delimiter, char **token) {
size_t buflen = 0;
size_t extra = 0;
buflen = strcspn ( str, delimiter); // characters to next delimiter
extra = 1;
if ( ! buflen) {
extra = 3; // need space for "./" copy in main
}
char *new_token = realloc(*token, ( buflen + extra) * sizeof **token);
if (new_token == NULL){
fprintf(stderr, "Out of Memory");
abort();
}
*token = new_token; //set the new pointer to old pointer
strncpy ( *token, str, buflen);
(*token)[buflen] = 0;
str += buflen;
if ( *str) {
str += 1;
}
return str;
}
int main(void){
const char *env =
"/bin/svsgerertegdfyufdujhdcjxbcn:/sbin:::/usr/bin/46526vw67vxshgshnxxcxhcgbxhcbxn";
while (*env){
char *token = NULL;
env = splitter(env, ":", &token); // note " instead of '
if (token[0] == '\0') {
strcpy(token, "./");
}
printf("%s\n", token) ;
free(token);
}
return 0;
}

Dynamic memory allocation for an array of pointers to char in C

I'm building a word counter program. To achieve this, I was thinking about saving the string the user inputted, and using strtok() to split the sentence with space as the delimiter. But first I want to allocate enough memory for each word. Let's say the sentence is "Hello World". I've already dynamically allocated memory for the string itself. Now I want to split Hello World into 2 strings, "Hello" and "World". My goal is to allocate enough memory so that there's not too much empty space but I also don't want to allocate too little space. Here is my code so far:
#include <stdio.h>
#include <stdlib.h>
char *strmalloc(char **string);
char *user_input = NULL;
char *word_array[];
int main(void) {
printf("Enter a sentence to find out the number of words: ");
user_input = strmalloc(&user_input);
return 0;
}
char *strmalloc(char **string) {
char *tmp = NULL;
size_t size = 0, index = 0;
int ch;
while ((ch = getchar()) != '\n' && ch != EOF) {
if (size <= index) {
size += 1;
tmp = realloc(*string, size);
if (!tmp) {
free(*string);
string = NULL;
break;
}
*string = tmp;
}
(*string)[index++] = ch;
}
return *string;
}
How would I go about doing this? Should I do the splitting first or allocate the space required for the array first?
You can count words without splitting the sentence, here is an example :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
// Change this to change the separator characters
static inline char isSeparator(char ch) { return isspace(ch) || ispunct(ch); }
char * jumpSeparator(char *string) {
while(string[0] && isSeparator(string[0])) string++;
return string;
}
char * findEndOfWord(char *string) {
while (string[0] && !isSeparator(string[0])) string++;
return string;
}
int countWords(char *string) {
char * ptr = jumpSeparator(string);
if (strlen(ptr) == 0) return 0;
int count = 1;
while((ptr = findEndOfWord(ptr)) && ptr[0]) {
ptr = jumpSeparator(ptr);
if (!ptr) break;
count++;
}
return count;
}
int main() {
char * sentence = "This is,a function... to||count words";
int count = countWords(sentence);
printf("%d\n", count); //====> 7
}
EDIT : Reusing the same functions here is another example that allocates substrings dynamically :
int main() {
char * sentence = "This is,a function... to||split words";
int count = countWords(sentence);
char * ptr = sentence, *start, *end;
char ** substrings = malloc(count * sizeof(char *));
int i=0;
while((ptr = jumpSeparator(ptr)) && ptr[0]) {
start = ptr;
ptr = findEndOfWord(ptr);
end = ptr;
int len = end-start;
char * newString = malloc(len + 1);
memcpy(newString, start, len);
newString[len] = 0;
substrings[i++] = newString;
}
// Prints the result
for(int i=0; i<count; i++) printf("%s\n", substrings[i]);
// Frees the allocated memory
for(int i=0; i<count; i++) free(substrings[i]);
free(substrings);
return 0;
}
Output :
This
is
a
function
to
split
words

Need help to split a string by delimiter (and keep the delimiter in the token list)

I want to split a string by a delimiter and keep the delimiter in the token list
I have a function that do the same thing as strtok but with a string delimiter (instead of a set of chars) but it doesn't keep the delimiter and can't take an array of delimiters as argument
This is a function that split a string into tokens as strtok does but taking a delimiter
static char *strtokstr(char *str, char *delimiter)
{
static char *string;
char *end;
char *ret;
if (str != NULL)
string = str;
if (string == NULL)
return string;
end = strstr(string, delimiter);
if (end == NULL) {
char *ret = string;
string = NULL;
return ret;
}
ret = string;
*end = '\0';
string = end + strlen(delimiter);
return ret;
}
I want to have a char **split(char *str, char **delimiters_list) that split a string by a set of delimiters and keep the delimiter in the token list
I think I also need a function to count the number of tokens so i can malloc the return of my split function
// delimiters is an array containing ["&&", "||" and NULL]
split("ls > file&&foo || bar", delimiters) should return an array containing ["ls > file", "&&", "foo ", "||", " bar"]
How that can be achieved ?
First, you have a memory error here :
static char *string;
if (str != NULL)
string = str;
if (string == NULL)
return string;
If stris NULL, string is not initialised and you use a uninitialised value in comparaison.
if you want copy a string, you must use the strdup function, the = will just copy the pointer and not the pointer content.
And here a way to do it :
#include <stdlib.h>
#include <string.h>
char *get_delimiters(char *str, char **delims)
{
for (int i = 0; delims[i]; i++)
if (!strncmp(str, delims[i], strlen(delims[i])))
return delims[i];
return NULL;
}
char **split(char *str, char **delimiters)
{
char *string = strdup(str);
char **result = NULL;
int n = 0;
char *delim = NULL;
for (int i = 0; string[i]; i++)
if (get_delimiters(string + i, delimiters))
n++;
result = malloc((n * 2 + 2) * sizeof(char *));
if (!result)
return NULL;
result[0] = string;
n = 1;
for (int i = 0; string[i]; i++) {
delim = get_delimiters(string + i, delimiters);
if (delim) {
string[i] = '\0';
result[n++] = delim;
result[n++] = string + i + strlen(delim);
}
}
result[n] = NULL;
return result;
}
result :
[0] 'ls > file'
[1] '&&'
[2] 'foo '
[3] '||'
[4] ' bar'
remember result and string are malloced, so you must free the result and result[0]
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array);
int main()
{
char **split_str;
char *delimiters[] = {
"&&",
"||"
};
int rows_in_returned_array;
split_str = split("ls > file&&foo || bar && abc ||pqwe", delimiters, 2 , &rows_in_returned_array);
int i;
for (i = 0 ; i < rows_in_returned_array ; ++i)
{
printf("\n%s\n", split_str[i]);
}
return 0;
}
char **split(char *str, char **delimiters, int number_of_delimiters, int *number_of_rows_in_return_array)
{
//temporary storage for array to be returned
char temp_store[100][200];
int row = 0;//row size of array that will be returned
char **split_str;
int i, j, k, l, mark = 0;
char temp[100];
for (i = 0 ; str[i] != '\0' ; ++i)
{
//Iterating through all delimiters to check if any is str
for (j = 0 ; j < number_of_delimiters ; ++j )
{
l = i;
for (k = 0 ; delimiters[j][k] != '\0' ; ++k)
{
if (str[i] != delimiters[j][k])
{
break;
}
++l;
}
//This means delimiter is in string
if (delimiters[j][k] == '\0')
{
//store the string before delimiter
strcpy(temp_store[row], &str[mark]);
temp_store[row ++][i - mark] = '\0';
//store string after delimiter
strcpy(temp_store[row], &str[i]);
temp_store[row ++][k] = '\0';
//mark index where this delimiter ended
mark = l;
//Set i to where delimiter ends and break so that outermost loop
//can iterate from where delimiter ends
i = l - 1;
break;
}
}
}
//store the string remaining
strcpy(temp_store[row++], &str[mark]);
//Allocate the split_str and store temp_store into it
split_str = (char **)malloc(row * sizeof(char *));
for (i=0 ; i < row; i++)
{
split_str[i] = (char *)malloc(200 * sizeof(char));
strcpy(split_str[i], temp_store[i]);
}
*number_of_rows_in_return_array = row;
return split_str;
}
This should probably work. Note that I have passed int * number_of_rows_in_return_array by ref because we need to know the row size of the retuned array.
I went into abstraction. First I created a "sentence" library, that allows for manipulating NULL terminated list of strings (char*). I wrote some initial accessors (sentence_init, sentence_size, sentence_free, sentence_add_str etc.).
Then I went to split, witch becomes really, really easy then - if a delimeter is found, add the string up the delimeter to the sentence and add the delimeter to the sentence. Then increment the string pointer position. If the delimeter is not found, add the remaining string to the sentence.
There is a real problem with double pointers tho, because char ** is not implicitly convertible to const char **. For production code, I would probably aim to refactor the code, and try to take const-correctness into account.
#define _GNU_SOURCE 1
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <stdbool.h>
/*
* sentence - list of words
*/
/* ----------------------------------------------------------- */
// if this would be production code, I think I would go with a
// struct word_t { char *word; }; struct sentence_t { struct word_t *words; };
// Note: when sentence_add_* fail - they free *EVERYTHING*, so it doesn't work like realloc
// shared_ptr? Never heard of it.
char **sentence_init(void) {
return NULL;
}
size_t sentence_size(char * const *t) {
if (t == NULL) return 0;
size_t i;
for (i = 0; t[i] != NULL; ++i) {
continue;
}
return i;
}
void sentence_free(char * const *t) {
if (t == NULL) return;
for (char * const *i = t; *i != NULL; ++i) {
free(*i);
}
free((void*)t);
}
void sentence_printex(char * const *t, const char *fmt1, const char *delim, const char *end) {
for (char * const *i = t; *i != NULL; ++i) {
printf(fmt1, *i);
if (*(i + 1) != NULL) {
printf(delim);
}
}
printf(end);
}
void sentence_print(char * const *t) {
sentence_printex(t, "%s", " ", "\n");
}
void sentence_print_quote_words(char * const *t) {
sentence_printex(t, "'%s'", " ", "\n");
}
bool sentence_cmp_const(const char * const *t, const char * const *other) {
const char * const *t_i = t;
const char * const *o_i = other;
while (*t_i != NULL && o_i != NULL) {
if (strcmp(*t_i, *o_i) != 0) {
return false;
}
++t_i;
++o_i;
}
return *t_i == NULL && *o_i == NULL;
}
// thet's always funny, because "dupa" in my language means "as*"
char **sentence_add_strdupped(char **t, char *strdupped) {
const size_t n = sentence_size(t);
const size_t add = 1 + 1;
const size_t new_n = n + add;
void * const pnt = realloc(t, new_n * sizeof(char*));
if (pnt == NULL) goto REALLOC_FAIL;
// we have to have place for terminating NULL pointer
assert(new_n >= 2);
t = pnt;
t[new_n - 2] = strdupped;
t[new_n - 1] = NULL;
// ownership of str goes to t
return t;
// ownership of str stays in the caller
REALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_strlened(char **t, const char *str, size_t len) {
char *strdupped = malloc(len + 1);
if (strdupped == NULL) goto MALLOC_FAIL;
memcpy(strdupped, str, len);
strdupped[len] = '\0';
t = sentence_add_strdupped(t, strdupped);
if (t == NULL) goto SENTENCE_ADD_STRDUPPED_FAIL;
return t;
SENTENCE_ADD_STRDUPPED_FAIL:
free(strdupped);
MALLOC_FAIL:
sentence_free(t);
return NULL;
}
char **sentence_add_str(char **t, const char *str) {
const size_t str_len = strlen(str);
return sentence_add_strlened(t, str, str_len);
}
/* ----------------------------------------------------------- */
/**
* Puff. Run strstr for each of the elements inside NULL delimeters dellist.
* If any returns not NULL, return the pointer as returned by strstr
* And fill dellist_found with the pointer inside dellist (can be NULL).
* Finally! A 3 star award is mine!
*/
char *str_find_any_strings(const char *str,
const char * const *dellist,
const char * const * *dellist_found) {
assert(str != NULL);
assert(dellist != NULL);
for (const char * const *i = &dellist[0]; *i != NULL; ++i) {
const char *found = strstr(str, *i);
if (found != NULL) {
if (dellist_found != NULL) {
*dellist_found = i;
}
// __UNCONST(found)
return (char*)found;
}
}
return NULL;
}
/**
* Split the string str according to the list od delimeters dellist
* #param str
* #param dellist
* #return returns a dictionary
*/
char **split(const char *str, const char * const *dellist) {
assert(str != NULL);
assert(dellist != NULL);
char **sen = sentence_init();
while (*str != '\0') {
const char * const *del_pnt = NULL;
const char *found = str_find_any_strings(str, dellist, &del_pnt);
if (found == NULL) {
// we don't want an empty string to be the last...
if (*str != '\0') {
sen = sentence_add_str(sen, str);
if (sen == NULL) return NULL;
}
break;
}
// Puff, so a delimeter is found at &str[found - str]
const size_t idx = found - str;
sen = sentence_add_strlened(sen, str, idx);
if (sen == NULL) return NULL;
assert(del_pnt != NULL);
const char *del = *del_pnt;
assert(del != NULL);
assert(*del != '\0');
const size_t del_len = strlen(del);
sen = sentence_add_strlened(sen, del, del_len);
if (sen == NULL) return NULL;
str += idx + del_len;
}
return sen;
}
int main()
{
char **sentence = split("ls > file&&foo || bar", (const char*[]){"&&", "||", NULL});
assert(sentence != NULL);
sentence_print_quote_words(sentence);
printf("cmp = %d\n", sentence_cmp_const((void*)sentence, (const char*[]){"ls > file", "&&", "foo ", "||", " bar", NULL}));
sentence_free(sentence);
return 0;
}
The program will output:
'ls > file' '&&' 'foo ' '||' ' bar'
cmp = 1

Remove a word from a sentence (string)

I am in the stage of preparing myself for exams, and the thing that I m least proud of are my skills with strings. What I need to do is remove a word from a sentence, without using <string.h> library at all.
This is what I've got so far. It keeps showing me that certain variables are not declared, such as start and end.
#include <stdio.h>
/* Side function to count the number of letters of the word we wish to remove */
int count(char *s) {
int counter = 0;
while (*s++) {
counter++;
s--;
return counter;
}
/* Function to remove a word from a sentence */
char *remove_word(const char *s1, const char *s2) {
int counter2 = 0;
/* We must remember where the string started */
const char *toReturn = s1;
/* Trigger for removing the word */
int found = 1;
/* First we need to find the word we wish to remove [Don't want to
use string.h library for anything associated with the task */
while (*s1 != '\0') {
const char *p = s1;
const char *q = s2;
if (*p == *q)
const char *start = p;
while (*p++ == *q++) {
counter2++;
if (*q != '\0' && counter2 < count(s2))
found = 0;
else {
const char *end = q;
}
}
/* Rewriting the end of a sentence to the beginning of the found word */
if (found) {
while (*start++ = *end++)
;
}
s1++;
}
return toReturn;
}
void insert(char niz[], int size) {
char character = getchar();
if (character == '\n')
character = getchar();
int i = 0;
while (i < size - 1 && character != '\n') {
array[i] = character;
i++;
character = getchar();
}
array[i] = '\0';
}
int main() {
char stringFirst[100];
char stringSecond[20];
printf("Type your text here: [NOT MORE THAN 100 CHARACTERS]\n");
insert(stringFirst, 100);
printf("\nInsert the word you wish to remove from your text.");
insert(stringSecond, 20);
printf("\nAfter removing the word, the text looks like this now: %s", stringFirst);
return 0;
}
your code is badly formed, i strongly suggest compiling with:
gcc -ansi -Wall -pedantic -Werror -D_DEBUG -g (or similar)
start with declaring your variables at the beginning of the function block, they are known only inside the block they are declared in.
your count function is buggy, missing a closing '}' (it doesn't compile)
should be something like
size_t Strlen(const char *s)
{
size_t size = 0;
for (; *s != '\n'; ++s, ++size)
{}
return size;
}
implementing memmove is much more efficient then copy char by char
I reformatted you code for small indentation problems and indeed indentation problems indicate real issues:
There is a missing } in count. It should read:
/* Side function to count the number of letters of the word we wish to remove */
int count(char *s) {
int counter = 0;
while (*s++) {
counter++;
}
return counter;
}
or better:
/* Side function to count the number of letters of the word we wish to remove */
int count(const char *s) {
const char *s0 = s;
while (*s++) {
continue;
}
return s - s0;
}
This function counts the number of bytes in the string, an almost exact clone of strlen except for the return type int instead of size_t. Note also that you do not actually use nor need this function.
Your function insert does not handle EOF gracefully and refuses an empty line. Why not read a line with fgets() and strip the newline manually:
char *input(char buf[], size_t size) {
size_t i;
if (!fgets(buf, size, stdin))
return NULL;
for (i = 0; buf[i]; i++) {
if (buf[i] == '\n') {
buf[i] = '\0';
break;
}
}
return buf;
}
In function remove_word, you should define start and end with a larger scope, typically the outer while loop's body. Furthermore s1 should have type char *, not const char *, as the phrase will be modified in place.
You should only increment p and q if the test succeeds and you should check that p and q are not both at the end of their strings.
last but not least: you do not call remove_word in the main function.
The complete code can be simplified into this:
#include <stdio.h>
/* Function to remove a word from a sentence */
char *remove_word(char *s1, const char *s2) {
if (*s2 != '\0') {
char *dst, *src, *p;
const char *q;
dst = src = s1;
while (*src != '\0') {
for (p = src, q = s2; *q != '\0' && *p == *q; p++, q++)
continue;
if (*q == '\0') {
src = p; /* the word was found, skip it */
} else {
*dst++ = *src++; /* otherwise, copy this character */
}
}
*dst = '\0'; /* put the null terminator if the string was shortened */
}
return s1;
}
char *input(char buf[], size_t size) {
size_t i;
if (!fgets(buf, size, stdin))
return NULL;
for (i = 0; buf[i]; i++) {
if (buf[i] == '\n') {
buf[i] = '\0';
break;
}
}
return buf;
}
int main() {
char stringFirst[102];
char stringSecond[22];
printf("Type your text here, up to 100 characters:\n");
if (!input(stringFirst, sizeof stringFirst))
return 1;
printf("\nInsert the word you wish to remove from your text: ");
if (!input(stringSecond, sizeof stringSecond))
return 1;
printf("\nAfter removing the word, the text looks like this now: %s\n",
remove_word(stringFirst, stringSecond));
return 0;
}
Your start and end pointers are defined within a block which makes their scope limited within that block. So, they are not visible to other parts of your code, and if you attempt to reference them outside their scope, the compiler will complain and throw an error. You should declare them at the beginning of the function block.
That said, consider the following approach to delete a word from a string:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int delete_word(char *buf,
const char *word);
int main(void)
{
const char word_to_delete[] = "boy";
fputs("Enter string: ", stdout);
char buf[256];
fgets(buf, sizeof(buf), stdin);
if (delete_word(buf, word_to_delete))
{
printf("Word %s deleted from buf: ", word_to_delete);
puts(buf);
}
else
{
printf("Word %s not found in buf: ", word_to_delete);
puts(buf);
}
system("PAUSE");
return 0;
}
int chDelimit(int ch)
{
return
(ch == '\n' || ch == '\t') ||
(ch >= ' ' && ch <= '/') ||
(ch >= ':' && ch <= '#') ||
(ch >= '[' && ch <= '`') ||
(ch >= '{' && ch <= '~') ||
(ch == '\0');
}
char *find_pattern(char *buf,
const char *pattern)
{
size_t n = 0;
while (*buf)
{
while (buf[n] && pattern[n])
{
if (buf[n] != pattern[n])
{
break;
}
n++;
}
if (!pattern[n])
{
return buf;
}
else if (!*buf)
{
return NULL;
}
n = 0;
buf++;
}
return NULL;
}
char *find_word(char *buf,
const char *word)
{
char *ptr;
size_t wlen;
wlen = strlen(word);
ptr = find_pattern(buf, word);
if (!ptr)
{
return NULL;
}
else if (ptr == buf)
{
if (chDelimit(buf[wlen]))
{
return ptr;
}
}
else
{
if (chDelimit(ptr[-1]) &&
chDelimit(ptr[wlen]))
{
return ptr;
}
}
ptr += wlen;
ptr = find_pattern(ptr, word);
while (ptr)
{
if (chDelimit(ptr[-1]) &&
chDelimit(ptr[wlen]))
{
return ptr;
}
ptr += wlen;
ptr = find_pattern(ptr, word);
}
return NULL;
}
int delete_word(char *buf,
const char *word)
{
size_t n;
size_t wlen;
char *tmp;
char *ptr;
wlen = strlen(word);
ptr = find_word(buf, word);
if (!ptr)
{
return 0;
}
else
{
n = ptr - buf;
tmp = ptr + wlen;
}
ptr = find_word(tmp, word);
while (ptr)
{
while (tmp < ptr)
{
buf[n++] = *tmp++;
}
tmp = ptr + wlen;
ptr = find_word(tmp, word);
}
strcpy(buf + n, tmp);
return 1;
}
If you have to do it manually, just loop over the indicies of your string to find the first one that matches and than you’ll have a second loop that loops for all the others that matches and resets all and jumps to the next index of the first loop if not matched something in order to continue the searching. If I recall accuretaly, all strings in C are accesible just like arrays, you’ll have to figure it out how. Don’t afraid, those principles are easy! C is an easy langugae, thiught very long to write.
In order to remove: store the first part in an array, store the second part in an array, alloc a new space for both of them and concatinate them there.
Thanks, hit the upvote button.
Vitali
EDIT: use \0 to terminate your newly created string.

replacing spaces with %20 in c program

I have written the following program to replace spaces with %20.It works fine.
But it prints some garbage values for the pointer variable ptr though it might have been limited to 8 characters as the malloc assigns it 8 bytes of memory.
Can anyone tell me where did I go wrong here ?
Or is there any in place algorithm ?
void replaceSpaces(char *inputStr )
{
char *ptr;
int i,length, spaceCount=0;
int newLength,j;
for (length=0; *(inputStr+length)!='\0';length++ )
{
if (*(inputStr+length)==' ')
{
spaceCount++;
}
}
newLength = length + 2*spaceCount;
ptr = (char *)malloc(newLength*sizeof(char));
for ( i = length-1; i >=0; i--)
{
if (*(inputStr+i)==' ')
{
*(ptr+newLength-1)='0';
*(ptr+ newLength-2)='2';
*(ptr+newLength-3)='%';
newLength = newLength -3;
}
else
{
*(ptr+newLength-1) = *(inputStr+i);
newLength = newLength -1;
}
}
for ( i = 0; *(ptr+i)!='\0'; i++)
{
printf("%c",*(ptr+i));
}
}
Either use calloc() to allocate memory for ptr or terminate it with '\0' after allocation.
With your code, ptr never gets terminated with '\0'.
So either change
ptr = (char *)malloc(newLength*sizeof(char));
to
ptr = calloc(newLength*sizeof(char), sizeof(char));
Or add below line after allocating the ptr.
ptr[newLength] = '\0';
If you don't need the converted string, then you don't need to worry about conversion: you can just output characters directly.
void replace_spaces(const char *in) {
for (const char *p=in; *p; p++) {
if (*p == ' ') {
puts("%20");
} else {
putch(*p);
}
}
}
If you do need the converted string, then a useful pattern for this sort of code is to do the string conversion twice; once in "dry-run" mode where you do the conversion but don't write to the result, and once in "live" mode where you're actually writing. In the first pass, you calculate the needed length. This avoids duplication of logic, and makes it more obvious if you've counted the resulting length correctly. Here's some code in that style, with some test-cases.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *replace_spaces(const char *in) {
char *result = 0;
for (int write = 0; write <= 1; write++) {
int n = 0;
for (const char *from = in; ;from++) {
if (*from == ' ') {
if (write) memcpy(result + n, "%20", 3);
n += 3;
} else {
if (write) result[n] = *from;
n++;
}
if (!*from) break;
}
if (!write) result = malloc(n);
}
return result;
}
int main(int argc, char**argv) {
const char*cases[] = {"Hello world", "abc", " sp sp sp "};
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
char *rep = replace_spaces(cases[i]);
printf("'%s' -> '%s'\n", cases[i], rep);
free(rep);
}
return 0;
}
Assuming that this is going to work on fairly short strings, it is a bit silly to go through the strings twice. If, like in most cases, CPU is more valuable than memory, do:
char *dst = malloc(3*strlen(src) + 1);
char *result = dst;
while (*src) {
if (*src == ' ') {
strcpy(dst, "%20"); dst+=3;
}
else *dst++ = *src;
src++;
}
*dst = 0;
// result is result

Resources