I have a text like this:
char* str="Hi all.\nMy name is Matteo.\n\nHow are you?"
and I want to split the string by "\n\n" in to an array like this:
char* array[3];
array[0]="Hi all.\nMy name is Matteo."
array[1]="How are you?"
array[2]=NULL
I've tried the strtok function but it does not split the string correctly.
#include <stdio.h>
#include <string.h>
int main(){
char *str="Hi all.\nMy name is Matteo.\n\nHow are you?";
char *array[3];
char *ptop, *pend;
char wk[1024];//char *wk=malloc(sizeof(char)*(strlen(str)+3));
int i, size = sizeof(array)/sizeof(char*);
/*
array[0]="Hi all.\nMy name is Matteo."
array[1]="How are you?"
array[2]=NULL
*/
strcpy(wk, str);
strcat(wk, "\n\n");
for(i=0, ptop=wk;i<size;++i){
if(NULL!=(pend=strstr(ptop, "\n\n"))){
*pend='\0';
array[i]=strdup(ptop);
ptop=pend+2;
} else {
array[i]=NULL;
break;
}
}
for(i = 0;i<size;++i)
printf("array[%d]=\"%s\"\n", i, array[i]);
return 0;
}
The strtok() function works on a set of single character delimiters. Your goal is to split by a two character delimiter, so strtok() isn't a good fit.
You could scan your input string via a loop that used strchr to find newlines and then checked to see if the next char was also a newline.
A more generic method based on strstr function:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main(void) {
char* str="Hi all.\nMy name is Matteo.\n\nHow are you?\n\nThanks";
char **result = NULL;
unsigned int index = 0;
unsigned int i = 0;
size_t size = 0;
char *ptr, *pstr;
ptr = NULL;
pstr = str;
while(pstr) {
ptr = strstr(pstr, "\n\n");
result = realloc(result, (index + 1) * sizeof(char *));
size = strlen(pstr) - ((ptr)?strlen(ptr):0);
result[index] = malloc(size * sizeof(char));
strncpy(result[index], pstr, size);
index++;
if(ptr) {
pstr = ptr + 2;
} else {
pstr = NULL;
}
} ;
for(i = 0; i < index; i++) {
printf("Array[%d] : >%s<\n", i, result[i]);
}
return 0;
}
Related
I want to create a c program that when the user enters some words like this: "some,words, in, c, proramming." the program save words in the string "str", then it creates Dynamically a 2D array and copies the words into the 2D array:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include <conio.h>
void freeMememory(int**array, int row){
for(int i=0;i<row;i++)
free(array[i]);
free(array);
}
int lettersCount(char *arr){
int space=0, letters=0;
do{
if(*arr !=' '&& *arr!='\t' && *arr!=','&& *arr!='.'){
letters =letters+1;
}
++arr;
}while(*arr);
return letters;
}
int wordCount(char *arr){
int space=0, words=0;
for(int i=0; arr[i]!='\0'; i++){
if(arr[i] ==' '|| arr[i]=='\t'|| arr[i]=='\n'||arr[i]==','||arr[i]=='.'){
space++;
}
if(space>0){
words++;
space=0;
}
}
return words;
}
int main (){
char arr[100];
int i, j, row, column;
scanf("%[^\n]s", &arr);
int *words = wordCount(arr);
int *letters = lettersCount(arr);
row=words;
column=letters;
int **ptr = (int **)malloc(row*column*sizeof(int));
for(i=0;i<row;i++){ptr[i]=(int*)malloc(column*sizeof(int));}
/*
//how should I write here to copy only words from arr to ptr?
like this:
arr = "some words, two,three,four."
ptr = {
"some", "words", "two", "", "three", "four",
}
*/
freeMememory(ptr, row);
return 0;}
So any ideas how to copy only the words from the string into the 2D array without copying (periods, spaces, cammas)?
What you might be looking for is strtok from <string.h>. I will also replace row with rows and column with columns in the following code snippet, as suggested by tadman in the comments.
/* no need to cast `malloc` */
char *ptr[rows];
for (int i = 0; i < rows; ++i) {
ptr[i] = malloc(columns);
if (!token) {
fprintf(stderr, "Error: memory allocation failed\n");
exit(EXIT_FAILURE);
}
}
const char *delims = " \t\n,.";
/* second argument are delimiters */
strcpy(ptr[0], strtok(arr, delims));
for (int i = 1; i < rows; ++i)
strcpy(ptr[i], strtok(NULL, delims));
I would also suggest simplifying your functions. For example your wordCount function could probably be simplified to this:
int count_words(char *str, const char *delims)
{
words = 1;
for (int i = 0; str[i] != '\0'; ++i)
if (strchr(delims, str[i]))
++words;
return words;
}
The function count_words could then be called like this:
const char *delims = " \t\n,.";
int words = count_words(arr, delims);
First notice that your code isn't using a 2D array. It's using an array of char-pointers that each point to a char-array. It's a different thing but it can be used in much the same way.
Below is an implementation that uses strtok to split the input string. Further, it uses realloc to make the array of char-pointers grow when a new word is found. Finally it uses a sentinel (i.e. NULL) to indicate end-of-words.
The code is pretty simple but the performance is poor.
Example:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char* str)
{
if (str == NULL) exit(1);
// Copy input string as strtok changes its input
char* str_cpy = malloc(strlen(str) + 1);
if (str_cpy == NULL) exit(1);
strcpy(str_cpy, str);
unsigned num_rows = 0;
char** arr = NULL;
// Get first token
const char *delims = " \t\n,.";
char* ptr = strtok(str_cpy, delims);
while (ptr)
{
// Allocate one more row
arr = realloc(arr, (num_rows + 1) * sizeof *arr);
if (arr == NULL) exit(1);
// Allocate memory for one more word
arr[num_rows] = malloc(strlen(ptr) + 1);
if (arr[num_rows] == NULL) exit(1);
strcpy(arr[num_rows], ptr);
++num_rows;
// Get next token
ptr = strtok(NULL, delims);
}
// Add a sentinel to indicate end-of-words
arr = realloc(arr, (num_rows + 1) * sizeof *arr);
if (arr == NULL) exit(1);
arr[num_rows] = NULL;
free(str_cpy);
return arr;
}
int main(void)
{
char* str = "some,words, in, c, programming.";
char** arr = split(str);
printf("Original string: %s\n", str);
for (int i=0; arr[i] != NULL; ++i)
{
printf("Word[%d]: %s\n", i, arr[i]);
}
// Free array
for (int i=0; arr[i] != NULL; ++i)
{
free(arr[i]);
}
free(arr);
return 0;
}
Output:
Original string: some,words, in, c, programming.
Word[0]: some
Word[1]: words
Word[2]: in
Word[3]: c
Word[4]: programming
I'm building a word counter program. To achieve this, I was thinking about saving the string the user inputted, and using strtok() to split the sentence with space as the delimiter. But first I want to allocate enough memory for each word. Let's say the sentence is "Hello World". I've already dynamically allocated memory for the string itself. Now I want to split Hello World into 2 strings, "Hello" and "World". My goal is to allocate enough memory so that there's not too much empty space but I also don't want to allocate too little space. Here is my code so far:
#include <stdio.h>
#include <stdlib.h>
char *strmalloc(char **string);
char *user_input = NULL;
char *word_array[];
int main(void) {
printf("Enter a sentence to find out the number of words: ");
user_input = strmalloc(&user_input);
return 0;
}
char *strmalloc(char **string) {
char *tmp = NULL;
size_t size = 0, index = 0;
int ch;
while ((ch = getchar()) != '\n' && ch != EOF) {
if (size <= index) {
size += 1;
tmp = realloc(*string, size);
if (!tmp) {
free(*string);
string = NULL;
break;
}
*string = tmp;
}
(*string)[index++] = ch;
}
return *string;
}
How would I go about doing this? Should I do the splitting first or allocate the space required for the array first?
You can count words without splitting the sentence, here is an example :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
// Change this to change the separator characters
static inline char isSeparator(char ch) { return isspace(ch) || ispunct(ch); }
char * jumpSeparator(char *string) {
while(string[0] && isSeparator(string[0])) string++;
return string;
}
char * findEndOfWord(char *string) {
while (string[0] && !isSeparator(string[0])) string++;
return string;
}
int countWords(char *string) {
char * ptr = jumpSeparator(string);
if (strlen(ptr) == 0) return 0;
int count = 1;
while((ptr = findEndOfWord(ptr)) && ptr[0]) {
ptr = jumpSeparator(ptr);
if (!ptr) break;
count++;
}
return count;
}
int main() {
char * sentence = "This is,a function... to||count words";
int count = countWords(sentence);
printf("%d\n", count); //====> 7
}
EDIT : Reusing the same functions here is another example that allocates substrings dynamically :
int main() {
char * sentence = "This is,a function... to||split words";
int count = countWords(sentence);
char * ptr = sentence, *start, *end;
char ** substrings = malloc(count * sizeof(char *));
int i=0;
while((ptr = jumpSeparator(ptr)) && ptr[0]) {
start = ptr;
ptr = findEndOfWord(ptr);
end = ptr;
int len = end-start;
char * newString = malloc(len + 1);
memcpy(newString, start, len);
newString[len] = 0;
substrings[i++] = newString;
}
// Prints the result
for(int i=0; i<count; i++) printf("%s\n", substrings[i]);
// Frees the allocated memory
for(int i=0; i<count; i++) free(substrings[i]);
free(substrings);
return 0;
}
Output :
This
is
a
function
to
split
words
I am trying to split a line into an array of words, but I am stuck on how to do this in C. My skills in C aren't very good, so I can't think of a way to "execute" my idea. Her is what I have so far:
int beginIndex = 0;
int endIndex = 0;
int maxWords = 10;
while (1) {
while (!isspace(str)) {
endIndex++;
}
char *tmp = (string from 'str' from beginIndex to endIndex)
arr[wordCnt] = tmp;
wordCnt++;
beginIndex = endIndex;
if (wordCnt = maxWords) {
return;
}
}
In my method I receive (char *str, char *arr[10]), and str is the line that I want to split when I encounter a space. arr is the array where I want to store the words. Is there any way to copy the 'chunk' of string that I want from 'str' into my tmp variable? This is the best way that I can think of right now, perhaps it's a terrible idea. If so, I would be happy to get some documentation or tips on a better method.
You should check out the C Library function strtok. You simply feed it the string you want to break up and a string of delimiters.
Here is an example of how it works (taken from the linked site):
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL) {
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
In your case instead of printing each string you would assign the pointer returned by strtok to the next element in your array arr.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int split(char *str, char *arr[10]){
int beginIndex = 0;
int endIndex;
int maxWords = 10;
int wordCnt = 0;
while(1){
while(isspace(str[beginIndex])){
++beginIndex;
}
if(str[beginIndex] == '\0')
break;
endIndex = beginIndex;
while (str[endIndex] && !isspace(str[endIndex])){
++endIndex;
}
int len = endIndex - beginIndex;
char *tmp = calloc(len + 1, sizeof(char));
memcpy(tmp, &str[beginIndex], len);
arr[wordCnt++] = tmp;
beginIndex = endIndex;
if (wordCnt == maxWords)
break;
}
return wordCnt;
}
int main(void) {
char *arr[10];
int i;
int n = split("1st 2nd 3rd", arr);
for(i = 0; i < n; ++i){
puts(arr[i]);
free(arr[i]);
}
return 0;
}
I'm trying to create a split function using strtok and a dynamic array.
However, I have no clue where things are going wrong: No informative error messages.
It does say segmentation fault, but I don't understand how the heap is corrupt or whatever causes that happens.
Would someone be willing to explain to me what is wrong and how to do it correctly?
Edit 11:16 CST code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **toArray(char **array, char str[], char sep[], int *count);
char** my_split(const char* str, char delim, int* size);
int main(int argc, char* argv[]) {
char* test = "Hello there lol";
int *count = 0;
char **array = malloc(sizeof(char*) * 5);
toArray(array, test, " ", count);
printf("Count: %d\n", *count);
int array_i;
for (array_i = 0; array_i < *count; array_i++) {
printf("array %d: %s\n", array_i, array[array_i]);
free(array[array_i]);
}
free(array);
return 1;
}
char **toArray(char **array, char str[], char sep[], int *count) {
char *temp = str;
temp = strtok(temp, sep);
array[0] = temp;
*count = 1;
while ((temp = strtok(NULL, sep)) != NULL ) {
array[(*count)++] = temp;
}
return array;
}
Compiler messages are our friend. I simpley used them to track down your issues. Try the following, and compare whats been done to what you had. Special attention to decalration and usage of pointer variables... :)
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
char **toArray(char **array, char str[], char sep[], int *count);
int main(int argc, char* argv[]) {
char test[] = "Hello there lol";
int count = 0;
char **array = malloc((sizeof(char*) * 5) +1); //added "+ 1" here, read why
toArray(array, test, " ", &count); //in comment below
printf("Count: %d\n", count);
int array_i;
for (array_i = 0; array_i < count; array_i++) {
printf("array %d: %s\n", array_i, array[array_i]);
//free(array[array_i]);
}
getchar();
free(array);
return 1;
}
char **toArray(char **array, char str[], char sep[], int *count) {
char *temp = str;
temp = strtok(temp, sep);
array[0] = temp;
*count = 1;
while ((temp = strtok(NULL, sep)) != NULL) {
array[(*count)++] = temp;
}
return array;
}
[EDIT] Example Output:
Also. The line char **array = malloc(sizeof(char*) * 5);, needed to be
char **array = malloc(sizeof(char*) * 5 + 1); because "hello" is actually 5 chars plus a NULL char, '\0'.
Some rules of thumb for C string(s).
1) when using malloc or calloc, don't forget to allow room for '\0'.
`char *buf1;` //buffer needed to manipulate buf2
`char buf2[]="someString";`
`buf1 = malloc(strlen(buf2)+1);` or `buf1 = malloc(sizeof(buf2));`
(note:, no '+1'. see '4)' below. )
2) clear (initialize) new allocated variable before use. eg:
memset(buf, 0, strlen("someString")+1); //preferred, all bytes are zeroed
OR
buf[0]=0; //useful, but use with care (only first byte is zeroed.)
3) Free all dynamically allocated memory when done with it. Eg:
free(buf);
4) Using strlen() function or sizeof() macro. (both popular for use in [mc]alloc())
Given:
char *buf1 ="Hello"; //6 characters |H|e|l|l|o|\0|
char buf2[] ="Hello"; //6 characters |H|e|l|l|o|\0|
char buf3[5]="Hello"; //5 characters |H|e|l|l|o|
char buf4[5]="Hel"; //4 characters |H|e|l|\0| |
char buf5[5]="Helloo";//should get compile error, too many initializers
Compare strlen() - sizeof() results:
strlen(buf1); //->5 (requires +1 in malloc for new variable req'd to hold "Hello\0")
sizeof(buf1); //->4 (returns sizof (char *), not # chars in string)
strlen(buf2); //->5 (requires +1 in malloc for new variable req'd yo hold "Hello\0")
sizeof(buf2); //->6 (counts all chars, including '\0')
strlen(buf3); //-> (error: Missing terminating NULL in string argument)
sizeof(buf3); //->5 (counts all chars, but there is no '\0' in this string - wrong!)
strlen(buf4); //->3 (counts chars, but not '\0')
sizeof(buf4); //->5 (counts ALL allocated space, including '\0')
You are passing char *test = "Hello there lol"; to your toArray(). Unfortunately, the string is not modifiable, so when you try to modify it with strtok(), you get a segmentation fault.
The simplest fix is:
char test[] = "Hello there lol";
You also have:
int *count = 0;
and you call the function with:
toArray(array, test, " ", count);
You need an integer, and to pass its address:
int count = 0;
...
toArray(array, test, " ", &count);
You were also trying to free the strings that were pointed at by the elements in array, but those were never allocated (they are parts of the string test). Don't free what was not allocated with malloc() et al.
With those fixes in place, this code works:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **toArray(char **array, char str[], char sep[], int *count);
int main(void)
{
char test[] = "Hello there lol";
int count = 0;
char **array = malloc(sizeof(char *) * 5);
toArray(array, test, " ", &count);
printf("Count: %d\n", count);
for (int i = 0; i < count; i++)
printf("array %d: %s\n", i, array[i]);
free(array);
return 0;
}
char **toArray(char **array, char str[], char sep[], int *count)
{
char *temp = str;
temp = strtok(temp, sep);
array[0] = temp;
*count = 1;
while ((temp = strtok(NULL, sep)) != NULL)
array[(*count)++] = temp;
return array;
}
Output:
Count: 3
array 0: Hello
array 1: there
array 2: lol
I need to use strtok to read in a first and last name and seperate it. How can I store the names where I can use them idependently in two seperate char arrays?
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char * test;
test = strtok (str," ");
while (test != NULL)
{
printf ("%s\n",test);
test= strtok (NULL, " ");
}
return 0;
}
Here is my take at a reasonably simple tokenize helper that
stores results in a dynamically growing array
null-terminating the array
keeps the input string safe (strtok modifies the input string, which is undefined behaviour on a literal char[], at least I think in C99)
To make the code re-entrant, use the non-standard strtok_r
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** tokenize(const char* input)
{
char* str = strdup(input);
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
char* tok=strtok(str," ");
while(1)
{
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = tok? strdup(tok) : tok;
if (!tok) break;
tok=strtok(NULL," ");
}
free(str);
return result;
}
int main ()
{
char** tokens = tokenize("test string.");
char** it;
for(it=tokens; it && *it; ++it)
{
printf("%s\n", *it);
free(*it);
}
free(tokens);
return 0;
}
Here is a strtok-free reimplementation of that (uses strpbrk instead):
char** tokenize(const char* str)
{
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
const char* e=str;
if (e) do
{
const char* s=e;
e=strpbrk(s," ");
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = e? strndup(s, e-s) : strdup(s);
} while (e && *(++e));
if (count >= capacity)
result = realloc(result, (capacity+=1)*sizeof(*result));
result[count++] = 0;
return result;
}
Do you need to store them separately? Two pointers into a modified char array will yield two separate perfectly usable strings.
That is we transform this:
char str[] ="test string.";
Into this:
char str[] ="test\0string.";
^ ^
| |
char *s1 ----- |
char *s2 -----------
.
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char *firstname = strtok(str, " ");
char *lastname = strtok(NULL, " ");
if (!lastname)
lastname = "";
printf("%s, %s\n", lastname, firstname);
return 0;
}
What about using strcpy:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_NAMES 2
int main ()
{
char str[] ="test string.";
char *names[MAX_NAMES] = { 0 };
char *test;
int i = 0;
test = strtok (str," ");
while (test != NULL && i < MAX_NAMES)
{
names[i] = malloc(strlen(test)+1);
strcpy(names[i++], test);
test = strtok (NULL, " ");
}
for(i=0; i<MAX_NAMES; ++i)
{
if(names[i])
{
puts(names[i]);
free(names[i]);
names[i] = 0;
}
}
return 0;
}
It contains much clutter to maintain a complete program and clean its resources, but the main point is to use strcpy to copy each token into its own string.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char *str, const char *delimiter, size_t *len){
char *text, *p, *first, **array;
int c;
char** ret;
*len = 0;
text=strdup(str);
if(text==NULL) return NULL;
for(c=0,p=text;NULL!=(p=strtok(p, delimiter));p=NULL, c++)//count item
if(c==0) first=p; //first token top
ret=(char**)malloc(sizeof(char*)*c+1);//+1 for NULL
if(ret==NULL){
free(text);
return NULL;
}
strcpy(text, str+(first-text));//skip until top token
array=ret;
for(p=text;NULL!=(p=strtok(p, delimiter));p=NULL){
*array++=p;
}
*array=NULL;
*len=c;
return ret;
}
void free4split(char** sa){
char **array=sa;
if(sa!=NULL){
free(array[0]);//for text
free(sa); //for array
}
}
int main(void){
char str[] ="test string.";
char **words;
size_t len=0;
int i;
words = split(str, " \t\r\n,.", &len);
/*
for(char **wk = words; *wk ;wk++){
printf("%s\n", *wk);
}
*/
for(i = 0;i<len;++i){
printf("%s\n", words[i]);
}
free4split(words);
return 0;
}
/* result:
test
string
*/
Copy the results from strtok to a new buffer using a function such as
/*
* Returns a copy of s in freshly allocated memory.
* Exits the process if memory allocation fails.
*/
char *xstrdup(char const *s)
{
char *p = malloc(strlen(s) + 1);
if (p == NULL) {
perror("memory allocation failed");
exit(1);
}
strcpy(p, s);
return p;
}
Don't forget to free the return values when you're done with them.
IMO, you don't need (and probably don't want) to use strtok at all (as in, "for this, or much of anything else"). I think I'd use code something like this:
#include <string.h>
#include <stdlib.h>
static char *make_str(char const *begin, char const *end) {
size_t len = end-begin;
char *ret = malloc(len+1);
if (ret != NULL) {
memcpy(ret, begin, len);
ret[len]='\0';
}
return ret;
}
size_t tokenize(char *tokens[], size_t max, char const *input, char const *delims) {
int i;
char const *start=input, *end=start;
for (i=0; *start && i<max; i++) {
for ( ;NULL!=strchr(delims, *start); ++start)
;
for (end=start; *end && NULL==strchr(delims, *end); ++end)
;
tokens[i] = make_str(start, end);
start = end+1;
}
return i;
}
#ifdef TEST
#define MAX_TOKENS 10
int main() {
char *tokens[MAX_TOKENS];
int i;
size_t num = tokenize(tokens, MAX_TOKENS, "This is a longer input string ", " ");
for (i=0; i<num; i++) {
printf("|%s|\n", tokens[i]);
free(tokens[i]);
}
return 0;
}
#endif
U can do something like this too.
int main ()
{
char str[] ="test string.";
char * temp1;
char * temp2;
temp1 = strtok (str," ");
temp2 = strchr(str, ' ');
if (temp2 != NULL)
temp2++;
printf ("Splitted string :%s, %s\n" , temp1 , temp2);
return
}