I am trying to implement a split funtion, which receives an array of chars and a delimiter that will decide what chars go to each array in the split array.
I have a problem with strcat and memset, can somone explain my mistakes to me?
char** split(const char* str, char delimiter)
{
int ch=0;
int word=0;
const char * zero="\0";
unsigned int size=num_items(str,delimiter);
/* get the size of split[][] */
char** split= calloc(size+1,sizeof(char*));
for(int i=0; i<strlen(str); i++)
{
if(ch==0)
{
memset(split[word],'\0',1);
/* set the first value to '\0' */
ch++;
}
if(str[i]!=delimiter)
{
/* adding char by char to the split */
strcat(split[word],&str[i]);
ch++;
}else{
ch=0;
word++;
}
}
return split;
}
Memory needs to be allocated for the pointers and the strings they point to.
For a single character, it can be assigned directly. No need for strcat and strcat expects pointers to zero terminated strings.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char** split(const char* str, char delimiter)
{
char** split= NULL;
int ch=0;
unsigned int size=0;
size_t len = strlen ( str);
for(int i=0; i<len; i++)
{
if(ch==0)
{
char** temp= realloc(split, sizeof *split * (size+2));//allocate pointer
if ( ! temp) {
fprintf ( stderr, "problem malloc\n");
return split;
}
split = temp;
split[size] = calloc ( 1, len + 1);//allocate for string
if ( ! split[size]) {
fprintf ( stderr, "problem calloc\n");
return split;
}
split[size + 1] = NULL;//sentinel
}
if(str[i]!=delimiter)
{
split[size][ch] = str[i];//assign character
ch++;
}else{
size_t length = strlen ( split[size]);
char *tmp = realloc ( split[size], length + 1);//reallocate to exact size
if ( ! tmp) {
fprintf ( stderr, "problem realloc\n");
return split;
}
ch=0;
size++;
}
}
return split;
}
int main ( void) {
char **words = NULL;
char *text = "a bc def ghij klmno pqr st u v wzyx";
char space = ' ';
words = split ( text, space);
int each = 0;
while ( words && words[each]) {
printf ( "%s\n", words[each]);
++each;
}
each = 0;
while ( words && words[each]) {
free ( words[each]);
++each;
}
free ( words);
return 0;
}
Related
I'm coming back to you about my function char **my_str_to_word_array(char *str). The purpose is to separate the string at each non-printable ASCII character and include the above in a new row of the double dimensional array.
Non-printable ASCII characters should be used as separators and should not be included in the line.
Example:
char *test = "My name is John Doe.\nI have 0 GPA.\nI will survive." ;
char **array = my_str_to_word_array(test) ;
array[0] = "My name is John Doe." (zero terminated string)
array[1] = "I have 0 GPA." (zero terminated string)
array[2] = "I will survive." (zero terminated string)
array[3] = NULL
I have 2 problems:
If in my test main() I have a printf() below the call to my_str_to_word_array, the format passed to printf() will be included in the array. So I conclude that there is a memory read error.
When I try to free() the array I get an error :
double free or corruption (out)
[1] 33429 IOT instruction (core dumped) ./libmy
size_t get_words_number(char const *str)
{
size_t count = 0;
const char *i = str;
while (*i != 0) {
if (isprint(*i)) {
count++;
}
while (*i != 0 && isprint(*i)) {
i++;
}
i++;
}
return count;
}
char **free_corrupted_array(char **array, size_t i)
{
size_t j = 0;
while (j < i) {
free(array[j]);
j++;
}
free(array);
return NULL;
}
char **fill_array(char **array, const char *str, size_t word_count)
{
size_t word_size = 0, j = 0;
const char *i = str;
while (j < word_count) {
while (*i != 0 && isprint(*i)) {
word_size++;
i++;
}
array[j] = strndup(i - word_size, word_size);
if (!array[j]) {
return free_corrupted_array(array, j);
}
word_size = 0;
j++;
while (!isprint(*i)) {
i++;
}
}
array[j] = NULL;
return array;
}
char **my_str_to_word_array(char const *str)
{
char **word_array = NULL;
size_t word_count = 0;
if (!str) {
return NULL;
}
word_count = get_words_number(str);
word_array = malloc(word_count * sizeof(char *));
if (!word_array) {
return NULL;
}
word_array = fill_array(word_array, str, word_count);
return word_array;
}
void my_free_word_array(char **word_array)
{
if (!word_array) {
return;
}
while (*word_array != NULL) {
free(*word_array);
word_array++;
}
free(word_array);
}
int main(int argc, char **argv)
{
const char *test = "My name is John Doe.\nI have 0 GPA.\nI will survive.";
char **word_array = my_str_to_word_array(test);
while (*word_array != NULL) {
printf("%s\n", *word_array);
word_array++;
}
printf("Test print original size %lu\n", strlen(test));
my_free_word_array(word_array);
return 0;
}
And the output :
My name is John Doe.
I have 0 GPA.
I will survive.
Test print original size %lu
Test print original size 50
double free or corruption (out)
[1] 33429 IOT instruction (core dumped) ./libmy
Do you see the problem?
Errors:
get_words_number goes out of bounds (off by one) and may read arbitrary memory after your string (check with the example I included in main).
You need an additional slot in your array to put there a terminating NULL.
Stop thrashing your input pointer if you later need it (both in my_free_word_array and in the printing loop in main).
EDITED: as Fe2O3 commented, I missed another bug in the fill_array function. You should also ensure that *i!=0 in the last loop.
Suggestions:
Next time make a Minimal, Reproducible Example by including all required headers;
strndup is not standard (unless you have __STDC_ALLOC_LIB__ and define __STDC_WANT_LIB_EXT2__ to 1).
You don't need the free_corrupted_array function at all.
EDITED: it's useless to check that *i!=0 if you are also checking that isprint(*i). 0 is not printable, so no need for the first check.
#define _CRT_SECURE_NO_WARNINGS
#ifdef __STDC_ALLOC_LIB__
#define __STDC_WANT_LIB_EXT2__ 1
#else
#include <stdlib.h>
#include <string.h>
char *strndup(const char *str, size_t size)
{
return strncpy(calloc(size + 1, 1), str, size);
}
#endif
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
size_t get_words_number(char const *str)
{
size_t count = 0;
const char *i = str;
while (*i != 0) {
if (isprint(*i)) {
count++;
}
while (*i != 0 && isprint(*i)) {
i++;
}
if (*i != 0) { // <--- This was missing
i++;
}
}
return count;
}
void my_free_word_array(char **word_array) // <--- Moved up
{
if (!word_array) {
return;
}
for (size_t i = 0; word_array[i] != NULL; ++i) { // <--- Stop thrashing word_array
free(word_array[i]);
}
free(word_array);
}
char **fill_array(char **array, const char *str, size_t word_count)
{
size_t word_size = 0, j = 0;
const char *i = str;
while (j < word_count) {
while (*i != 0 && isprint(*i)) {
word_size++;
i++;
}
array[j] = strndup(i - word_size, word_size);
if (!array[j]) {
my_free_word_array(array); // <--- No need for another free here
return NULL;
}
word_size = 0;
j++;
while (*i != 0 && !isprint(*i)) {
i++;
}
}
array[j] = NULL;
return array;
}
char **my_str_to_word_array(char const *str)
{
char **word_array = NULL;
size_t word_count = 0;
if (!str) {
return NULL;
}
word_count = get_words_number(str);
word_array = malloc((word_count + 1) * sizeof(char *)); // <--- You need a +1 here
if (!word_array) {
return NULL;
}
word_array = fill_array(word_array, str, word_count);
return word_array;
}
int main(int argc, char **argv)
{
char test[] = "My name is John Doe.\nI have 0 GPA.\nI will survive.\nThis will be removed from the string";
*strrchr(test,'\n') = 0;
char **word_array = my_str_to_word_array(test);
if (word_array) {
for (size_t i = 0; word_array[i] != NULL; ++i) { // <--- Stop thrashing word_array
printf("%s\n", word_array[i]);
}
printf("Test print original size %zu\n", strlen(test));
my_free_word_array(word_array);
}
return 0;
}
OP's code missed a check for a null character. #Costantino Grana
Candidate get_words_number() correction and simplification:
Count transitions from "non-word" to "word".
Use unsigned char* for defined use for all characters in is...() functions.
#include <ctype.h>
#include <stdbool.h>
size_t get_words_number(char const *str) {
const unsigned char *ustr = (const unsigned char *) str;
size_t count = 0;
bool previous_not_a_word = true;
while (*ustr) {
count += previous_not_a_word && isprint(*ustr);
previous_not_a_word = !isprint(*ustr);
ustr++;
}
return count;
}
The biggest problem with the OP code is that it has been fractured into so many helper functions that it is almost unreadable. A simple process has been muddied by this fragmentation.
Below is a "single pass" version that solves this problem. It does not involve helper functions with their parameters and maddening variable names.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
int main() {
// OP string still as "string literal"
char *test = "My name is John Doe.\nI have 0 GPA.\nI will survive.";
// mutable copy of that string
char *copy = malloc( strlen( test ) + 1 ); // Verify!
strcpy( copy, test );
// get one element that is NULL
size_t cnt = 0;
char **arr = calloc( ++cnt, sizeof *arr ); // Verify!!
// chop copy on separators
char *cp = copy;
while( *cp ) {
// skip leading/trailing separators
while( *cp && !isprint( *cp ) ) cp++;
if( !*cp ) break; // was 1 or more trailing seps
// search for end of segment
char *ep = cp;
while( isprint( *ep ) ) ep++;
// remember if this is the final segment
bool atEnd = *ep == '\0';
// terminate segment and store pointer
*ep = '\0';
arr = realloc( arr, ++cnt * sizeof *arr ); // Verify!!!
arr[ cnt-2 ] = cp;
arr[ cnt-1 ] = NULL;
// move on (only if there is more to examine).
cp = ep + !atEnd;
}
// output
cnt = 0;
do
printf( "%d: %s\n", cnt, arr[ cnt ] ? arr[ cnt ] : "END OF ARRAY" );
while( arr[ cnt++ ] );
// cleanup
free( arr );
free( copy );
return 0;
}
The crucial verifications of return values from the heap allocation functions have been deliberately omitted to improve the clarity of this example code. Those verifications are left as an exercise for the reader.
I am trying to enhance the string splitter by splits on : char. Original version can be found at string splitter - how is it working
I do not want to use MAX_TOKEN_SIZE, I want the buffer to be just enough to hold each token. I added malloc and realloc as follows but I am getting free(): double free detected in tcache 2 error which I do not understand. How am I double freeing ? Thanks for all your help.
PS: Based on Gerhardh's comments, I modified the code as follows, but now I am getting segfault.
PS: Based on user3121023's comments, I added parenthesis around *token in 2 places and it works now.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
# define GROWBY 32
const char* splitter(const char *str, char delimiter, char **token) {
size_t i = 0;
size_t buflen = 32;
while (*str) {
if ( i == buflen) {
buflen += GROWBY;
printf("gowing buffer\n");
char *new_token = realloc(*token, buflen * sizeof **token);
if (new_token == NULL){
fprintf(stderr, "Out of Memory");
abort();
}
*token = new_token; //set the new pointer to old pointer
}
char c = *(str++);
if (c == delimiter)
break;
(*token)[i++] = c; //user3121023
}
(*token)[i] = '\0'; /* set the null terminator, user3121023 */
return str;
}
int main(){
const char *env =
"/bin/svsgerertegdfyufdujhdcjxbcn:/sbin:::/usr/bin/46526vw67vxshgshnxxcxhcgbxhcbxn";
while (*env){
char *token = malloc(GROWBY * sizeof(char));
env = splitter(env, ':', &token);
if (token[0] == '\0') {
strcpy(token, "./");
}
printf("%s\n", token) ;
free(token);
}
return 0;
}
Try using strcspn to advance to the next delimiter.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const char* splitter(const char *str, char *delimiter, char **token) {
size_t buflen = 0;
size_t extra = 0;
buflen = strcspn ( str, delimiter); // characters to next delimiter
extra = 1;
if ( ! buflen) {
extra = 3; // need space for "./" copy in main
}
char *new_token = realloc(*token, ( buflen + extra) * sizeof **token);
if (new_token == NULL){
fprintf(stderr, "Out of Memory");
abort();
}
*token = new_token; //set the new pointer to old pointer
strncpy ( *token, str, buflen);
(*token)[buflen] = 0;
str += buflen;
if ( *str) {
str += 1;
}
return str;
}
int main(void){
const char *env =
"/bin/svsgerertegdfyufdujhdcjxbcn:/sbin:::/usr/bin/46526vw67vxshgshnxxcxhcgbxhcbxn";
while (*env){
char *token = NULL;
env = splitter(env, ":", &token); // note " instead of '
if (token[0] == '\0') {
strcpy(token, "./");
}
printf("%s\n", token) ;
free(token);
}
return 0;
}
I'm building a word counter program. To achieve this, I was thinking about saving the string the user inputted, and using strtok() to split the sentence with space as the delimiter. But first I want to allocate enough memory for each word. Let's say the sentence is "Hello World". I've already dynamically allocated memory for the string itself. Now I want to split Hello World into 2 strings, "Hello" and "World". My goal is to allocate enough memory so that there's not too much empty space but I also don't want to allocate too little space. Here is my code so far:
#include <stdio.h>
#include <stdlib.h>
char *strmalloc(char **string);
char *user_input = NULL;
char *word_array[];
int main(void) {
printf("Enter a sentence to find out the number of words: ");
user_input = strmalloc(&user_input);
return 0;
}
char *strmalloc(char **string) {
char *tmp = NULL;
size_t size = 0, index = 0;
int ch;
while ((ch = getchar()) != '\n' && ch != EOF) {
if (size <= index) {
size += 1;
tmp = realloc(*string, size);
if (!tmp) {
free(*string);
string = NULL;
break;
}
*string = tmp;
}
(*string)[index++] = ch;
}
return *string;
}
How would I go about doing this? Should I do the splitting first or allocate the space required for the array first?
You can count words without splitting the sentence, here is an example :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
// Change this to change the separator characters
static inline char isSeparator(char ch) { return isspace(ch) || ispunct(ch); }
char * jumpSeparator(char *string) {
while(string[0] && isSeparator(string[0])) string++;
return string;
}
char * findEndOfWord(char *string) {
while (string[0] && !isSeparator(string[0])) string++;
return string;
}
int countWords(char *string) {
char * ptr = jumpSeparator(string);
if (strlen(ptr) == 0) return 0;
int count = 1;
while((ptr = findEndOfWord(ptr)) && ptr[0]) {
ptr = jumpSeparator(ptr);
if (!ptr) break;
count++;
}
return count;
}
int main() {
char * sentence = "This is,a function... to||count words";
int count = countWords(sentence);
printf("%d\n", count); //====> 7
}
EDIT : Reusing the same functions here is another example that allocates substrings dynamically :
int main() {
char * sentence = "This is,a function... to||split words";
int count = countWords(sentence);
char * ptr = sentence, *start, *end;
char ** substrings = malloc(count * sizeof(char *));
int i=0;
while((ptr = jumpSeparator(ptr)) && ptr[0]) {
start = ptr;
ptr = findEndOfWord(ptr);
end = ptr;
int len = end-start;
char * newString = malloc(len + 1);
memcpy(newString, start, len);
newString[len] = 0;
substrings[i++] = newString;
}
// Prints the result
for(int i=0; i<count; i++) printf("%s\n", substrings[i]);
// Frees the allocated memory
for(int i=0; i<count; i++) free(substrings[i]);
free(substrings);
return 0;
}
Output :
This
is
a
function
to
split
words
Let's say I have this string:
char *myTestString = "Hello AND test AND test2";
I want to break this down into the set { Hello, test, test2 }, which I can finally iterate over.
Or I have
char *myTestString2 = "Hi AND there AND test AND test2";
I want to break this down into the set { Hi, there, test, test2 }, which I can later iterate over.
How do I achieve this using C?
EDIT:
Another example is splitting "Hello there AND test" should give out set { Hello there, test }.
For clarification "AND" is delimiter here.
When code does not want to alter the source string, use strcspn(s, delimet) to look for initial portion of a s not make up of delimit. It returns the offset.
Use strspn(s, delimet) to look for initial portion of a s made up of delimit. It returns the offset.
Note: As others mentioned, strtok() is no good for string literals, and in this case you should go with Chux's answer (strcspn), but if that's not an issue and you can work with strings stored in arrays, then continue reading. A last resort effort would be to work with a copy of the string literal.
First, you have to decide which data structure you'll use for your set (e.g. a simple linked list, where you check before instertion for duplicates).
Then, use strtok() to your string, and if the current token is different than "AND" (or any other string to be ignored - you'd have an ignore set of strings as well), then insert it in the set, otherwise, continue to the next token.
Here is a basic Complete Minimal Example to get you started:
#include <stdio.h>
#include <string.h>
#define N 3 // Max size of set
#define LEN 32 // Max length of word - 1
int main ()
{
char set[N][LEN] = {0};
char* ignore_str = "AND";
char str[] ="Hello AND test AND test2";
char* pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ");
int i = 0;
while (pch != NULL)
{
printf ("%s\n",pch);
if(strcmp(pch, ignore_str))
strcpy(set[i++], pch);
pch = strtok (NULL, " ");
}
printf("My set is: {");
for(int j = 0; j < i; ++j)
printf("%s, ", set[j]);
printf("}\n");
return 0;
}
Output:
Splitting string "Hello AND test AND test2" into tokens:
Hello
AND
test
AND
test2
My set is: {Hello, test, test2, }
Here, I used an array to represent the set, assuming that the maximum size of the set would be 3. Of course, you could use more dynamic approach instead (e.g. dynamic memory allocated array or list).
Here you are.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char ** split( const char *s1, const char *s2 )
{
char **tokens = malloc( sizeof( char * ) );
int success = tokens != NULL;
if ( success )
{
const char *delim = " \t";
*tokens = NULL;
for ( size_t n = 1, len = strlen( s2 ); success && *s1; )
{
s1 += strspn( s1, delim );
if ( *s1 )
{
const char *p = s1;
s1 += strcspn( s1, delim );
if ( strncmp( p, s2, len ) != 0 )
{
char **tmp = realloc( tokens, ( n + 1 ) * sizeof( char * ) );
if ( ( success = tmp != NULL ) )
{
tokens = tmp;
success = ( tokens[n-1] = calloc( 1, s1 - p + 1 ) ) != NULL;
strncpy( tokens[n-1], p, s1 - p );
tokens[n] = NULL;
++n;
}
if ( !success )
{
for ( size_t i = 0; i < n; i++ ) free( tokens[i] );
free( tokens );
}
}
}
}
}
return tokens;
}
int main(void)
{
const char *s1 = "Hi AND there AND test AND test2";
const char *s2 = "AND";
char **tokens = split( s1, s2 );
if ( tokens != NULL )
{
for ( char **p = tokens; *p != NULL; ++p )
{
puts( *p );
}
char **p = tokens;
do
{
free( *p );
} while ( *p++ != NULL );
free( tokens );
}
return 0;
}
The program output is
Hi
there
test
test2
The function returns NULL if a memory allocation was not successful. Otherwise it returns a pointer to an array of the element type char * the last element of which is null pointer.
The words in the source string are splited by tabs and spaces. You can change the delimiters as you like.
After your comment to my previous solution it seems you need the following
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char ** split( const char *s1, const char *s2 )
{
char **tokens = malloc( sizeof( char * ) );
int success = tokens != NULL;
if ( success )
{
const char *delim = " \t";
*tokens = NULL;
for ( size_t n = 1, len2 = strlen( s2 ); success && *s1; )
{
for ( int empty = 1; empty; )
{
s1 += strspn( s1, delim );
if ( ( empty = strncmp( s1, s2, len2 ) == 0 ) )
{
s1 += len2;
}
}
if ( *s1 )
{
const char *p = strstr( s1, s2 );
size_t len1 = p == NULL ? strlen( s1 ) : p - s1;
char **tmp = realloc( tokens, ( n + 1 ) * sizeof( char * ) );
if ( ( success = tmp != NULL ) )
{
tokens = tmp;
success = ( tokens[n-1] = calloc( 1, len1 + 1 ) ) != NULL;
strncpy( tokens[n-1], s1, len1 );
tokens[n] = NULL;
++n;
s1 += p == NULL ? len1 : len1 + len2;
}
if ( !success )
{
for ( size_t i = 0; i < n; i++ ) free( tokens[i] );
free( tokens );
}
}
}
}
return tokens;
}
int main(void)
{
const char *s1 = "Hi there AND test test2";
const char *s2 = "AND";
char **tokens = split( s1, s2 );
if ( tokens != NULL )
{
for ( char **p = tokens; *p != NULL; ++p )
{
puts( *p );
}
char **p = tokens;
do
{
free( *p );
} while ( *p++ != NULL );
free( tokens );
}
return 0;
}
The program output is
Hi there
test test2
Maybe you need also to remove trailing blanks of a extracted sub-string that I hope you can do yourself.:).
strstr can be used to locate the sub-string. Check that the leading and trailing characters are space or trailing terminating zero.
As needed remove whitespace.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int main ( void) {
char *myTestString = " AND SANDY AND Hello there AND AND test AND test2 AND test3 ";
char *match = "AND";
char *first = myTestString;
char *start = myTestString;
char *find = myTestString;
int len = strlen ( match);
while ( isspace ( (unsigned char)*start)) {//skip leading whitespace
++start;
++first;
}
while ( ( find = strstr ( start, match))) {
if ( find != first) {
//check for leading and trailing space or terminating zero
while ( ! (isspace ( (unsigned char)*(find - 1))
&& ( isspace ( (unsigned char)*(find + len)) || 0 == *(find + len)))) {
find = strstr ( find + 1, match);
if ( ! find) {
find = start + strlen ( start);
while ( isspace ( (unsigned char)*(find - 1))) {
--find;
}
break;
}
}
int span = (int)(find - start);
if ( span) {
printf ( "%.*s\n", span, start);
}
}
start = find + strlen ( match);
while ( isspace ( (unsigned char)*start)) {//skip trailing whitespace
++start;
}
}
if ( *start) {
int end = strlen ( start) - 1;
while ( isspace ( (unsigned char)start[end])) {
--end;//remove trailing whitspace
}
printf ("%.*s\n", end + 1, start);
}
return 0;
}
Allocate memory to char**, allocate memory and copy each token.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char **freetokens ( char **tokens);
void showtokens ( char **tokens);
char **addtoken ( char **tokens, int *count, char *text, int size);
int main ( void) {
char *myTestString = " AND SANDY AND Hello there test AND AND test2 AND test3 ";
char *match = "AND";
char *first = myTestString;
char *start = myTestString;
char *find = myTestString;
char **tokens = NULL;
int items = 0;
int len = strlen ( match);
while ( isspace ( (unsigned char)*start)) {//skip leading whitespace
++start;
++first;
}
while ( ( find = strstr ( start, match))) {
if ( find != first) {
//check for leading and trailing space or terminating zero
while ( ! (isspace ( (unsigned char)*(find - 1))
&& ( isspace ( (unsigned char)*(find + len)) || 0 == *(find + len)))) {
find = strstr ( find + 1, match);
if ( ! find) {
find = start + strlen ( start);
while ( isspace ( (unsigned char)*(find - 1))) {
--find;//remove trailing whitespace
}
break;
}
}
int span = (int)(find - start);
if ( span) {
tokens = addtoken ( tokens, &items, start, span);
}
}
start = find + strlen ( match);
while ( isspace ( (unsigned char)*start)) {//skip trailing whitespace
++start;
}
}
if ( *start) {
int end = strlen ( start);
while ( isspace ( (unsigned char)start[end - 1])) {
--end;
}
tokens = addtoken ( tokens, &items, start, end);
}
showtokens ( tokens);
tokens = freetokens ( tokens);
return 0;
}
char **addtoken ( char **tokens, int *count, char *text, int size) {
char **temp = NULL;
if ( NULL == ( temp = realloc ( tokens, sizeof *tokens * ( *count + 2)))) {
fprintf ( stderr, "problem realloc tokens\n");
return tokens;
}
tokens = temp;
tokens[*count + 1] = NULL;//sentinel
if ( NULL == ( tokens[*count] = malloc ( size + 1))) {
fprintf ( stderr, "problem realloc tokens[]\n");
return tokens;
}
memmove ( tokens[*count], text, size);
tokens[*count][size] = 0;//terminate
++*count;
return tokens;
}
char **freetokens ( char **tokens) {
int each = 0;
while ( tokens && tokens[each]) {
free ( tokens[each]);
++each;
}
free ( tokens);
return NULL;
}
strstr() is the tool you are looking for. It can locate a string inside another string.
Here is a simple solution with these extra specifications:
the return value is an array of (n+1) entries, the last one being a null pointer.
the separator string can appear anywhere, including inside a word.
substrings are trimmed: initial and trailing white space is removed
substrings are allocated with strndup() which is standardized in POSIX.
the separator string must have a length of at least 1
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *strdup_trim(const char *s, size_t n) {
while (n > 0 && isspace((unsigned char)*s)) {
s++;
n--;
}
while (n > 0 && isspace((unsigned char)s[n - 1])) {
n--;
}
return strndup(s, n);
}
char **split(const char *str, const char *sep) {
size_t i, n, sep_len = strlen(sep);
char **a;
const char *p, *p0;
if (sep_len == 0)
return NULL;
for (n = 0, p = str; (p = strstr(p, sep)) != NULL; n++, p += sep_len)
continue;
a = malloc(sizeof(*a) * (n + 2));
if (a == NULL)
return NULL;
for (i = 0, p = str; (p = strstr(p0 = p, sep)) != NULL; i++, p += sep_len) {
a[i] = strdup_trim(p0, p - p0);
}
a[i++] = strdup_trim(p0, strlen(p0));
a[i] = NULL;
return a;
}
void free_split(char **a) {
if (a) {
for (size_t i = 0; a[i]; i++)
free(a[i]);
free(a);
}
}
void test(const char *str, const char *sep) {
char **a = split(str, sep);
printf("split('%s', '%s') -> {", str, sep);
for (size_t i = 0; a[i]; i++)
printf("%s '%s'", &","[!i], a[i]);
printf(" }\n");
free_split(a);
}
int main() {
test("Hello AND test AND test2", "AND");
test("Hi AND there AND test AND test2", "AND");
test("Hello there AND test", "AND");
return 0;
}
Output:
split('Hello AND test AND test2', 'AND') -> { 'Hello', 'test', 'test2' }
split('Hi AND there AND test AND test2', 'AND') -> { 'Hi', 'there', 'test', 'test2' }
split('Hello there AND test', 'AND') -> { 'Hello there', 'test' }
This is the code. It is supposed to read text from the user and then separate that text to paragraphs, sentences and words. I am stuck on the paragraphs phase as I seem to be able to get the code working only if I input text once. The second time I try to input text I get a segmentation faulted (core dumped) error. I think it has something to do with the realloc in the function but I am not sure.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void read_par(char ***p, char ***s, char ***w, int *n_p, int *n_s, int *n_w)
{
char *temp = malloc(10001 * sizeof(char));
fgets(temp, 10001, stdin);
//paragraphs
*n_p += 1;
**p = realloc(**p, *n_p * sizeof(char *));
*p[*n_p - 1] = malloc((strlen(temp) + 1) * sizeof(char));
strcpy(*p[*n_p - 1], temp);
}
int main()
{
char option[4], **paragraphs = malloc(sizeof(char *)), **sentences = malloc(sizeof(char *)), **words = malloc(sizeof(char *));
int num_par = 0, num_sent = 0, num_words = 0, i;
fgets(option, 4, stdin);
option[strcspn(option, "\n")] = '\0';
while(strcmp(option, "qt"))
{
if(!(strcmp(option, "ap")))
read_par(¶graphs, &sentences, &words, &num_par, &num_sent, &num_words);
fgets(option, 4, stdin);
option[strcspn(option, "\n")] = '\0';
}
for(i = 0;i < num_par;i++)
printf("%s", paragraphs[i]);
return 0;
}
Check the return of malloc and realloc in case they fail.
The call to realloc needs one de-reference *p. It is a good practice to assign realloc to a temporary pointer in case of failure. The original pointer is still valid.
In the function, using char line[10001] avoids malloc and free. The absence of free in the original code, leaks memory.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void read_par(char ***p, char ***s, char ***w, int *n_p, int *n_s, int *n_w)
{
char line[10001] = "";
char **tmp = NULL;//tmp pointer for realloc
printf ( "\tenter line\n");
fgets ( line, 10001, stdin);
//paragraphs
*n_p += 1;
if ( NULL != ( tmp = realloc ( *p, *n_p * sizeof(char *)))) {
if ( NULL != ( tmp[*n_p - 1] = malloc ( ( strlen ( line) + 1) * sizeof(char)))) {
strcpy ( tmp[*n_p - 1], line);
*p = tmp;//assign tmp pointer to *p
}
else {
fprintf ( stderr, "malloc problem\n");
*p = tmp;
*n_p -= 1;
}
}
else {
fprintf ( stderr, "realloc problem\n");
*n_p -= 1;
}
}
int main( void)
{
char option[4], **paragraphs = NULL, **sentences = NULL, **words = NULL;
int num_par = 0, num_sent = 0, num_words = 0, i;
do {
printf ( "\nenter option:");
fflush ( stdout);
fgets ( option, 4, stdin);
option[strcspn ( option, "\n")] = '\0';
if( ! ( strcmp ( option, "ap")))
read_par ( ¶graphs, &sentences, &words, &num_par, &num_sent, &num_words);
} while ( strcmp ( option, "qt"));
for( i = 0; i < num_par; i++)
printf ( "%s", paragraphs[i]);
for ( i = 0; i < num_par; i++)
free ( paragraphs[i]);
free(paragraphs);
return 0;
}
adding the string something like this:
typedef struct
{
size_t nstrings;
char *strings[];
}myStringTable_t;
myStringTable_t *init(void)
{
return calloc(sizeof(myStringTable_t),1);
}
char *addString(const char *str, myStringTable_t **mst)
{
myStringTable_t *mst_temp;
char *temp;
if(mst && *mst && str)
{
temp = malloc(strlen(str) + 1);
if(temp)
{
mst_temp = realloc(*mst, sizeof(*mst) + ((*mst) -> nstrings + 1) * sizeof(*(*mst) -> strings));
if(mst_temp)
{
*mst = mst_temp;
strcpy(temp, str);
(*mst) -> strings[(*mst) -> nstrings++] = temp;
}
else
{
free(temp);
temp = NULL;
}
}
}
return temp;
}