Split line into array of words + C - c

I am trying to split a line into an array of words, but I am stuck on how to do this in C. My skills in C aren't very good, so I can't think of a way to "execute" my idea. Her is what I have so far:
int beginIndex = 0;
int endIndex = 0;
int maxWords = 10;
while (1) {
while (!isspace(str)) {
endIndex++;
}
char *tmp = (string from 'str' from beginIndex to endIndex)
arr[wordCnt] = tmp;
wordCnt++;
beginIndex = endIndex;
if (wordCnt = maxWords) {
return;
}
}
In my method I receive (char *str, char *arr[10]), and str is the line that I want to split when I encounter a space. arr is the array where I want to store the words. Is there any way to copy the 'chunk' of string that I want from 'str' into my tmp variable? This is the best way that I can think of right now, perhaps it's a terrible idea. If so, I would be happy to get some documentation or tips on a better method.

You should check out the C Library function strtok. You simply feed it the string you want to break up and a string of delimiters.
Here is an example of how it works (taken from the linked site):
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL) {
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
In your case instead of printing each string you would assign the pointer returned by strtok to the next element in your array arr.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int split(char *str, char *arr[10]){
int beginIndex = 0;
int endIndex;
int maxWords = 10;
int wordCnt = 0;
while(1){
while(isspace(str[beginIndex])){
++beginIndex;
}
if(str[beginIndex] == '\0')
break;
endIndex = beginIndex;
while (str[endIndex] && !isspace(str[endIndex])){
++endIndex;
}
int len = endIndex - beginIndex;
char *tmp = calloc(len + 1, sizeof(char));
memcpy(tmp, &str[beginIndex], len);
arr[wordCnt++] = tmp;
beginIndex = endIndex;
if (wordCnt == maxWords)
break;
}
return wordCnt;
}
int main(void) {
char *arr[10];
int i;
int n = split("1st 2nd 3rd", arr);
for(i = 0; i < n; ++i){
puts(arr[i]);
free(arr[i]);
}
return 0;
}

Related

How to split with multiple delimiters in C

I have this line of text:
32+-#3#2-#3#3
I need to separate numbers from each other. So basically the result would be like this:
3
2+-
3
2-
3
3
This is my code but it's not working properly because I have numbers with two digits:
#include <stdio.h>
#include <string.h>
int main(void) {
char string[50] = "32-#3#2-#3#3";
// Extract the first token
char *token = strtok(string, "#");
// loop through the string to extract all other tokens
while (token != NULL) {
printf(" %s\n", token); //printing each token
token = strtok(NULL, "#");
}
return 0;
}
You can't do it with strtok (alone), because there is no delimiter between the numbers you want to split. It's easier without strtok, just print what you want printed and add a separator unless a character which belongs to the token follows:
#include <stdio.h>
int main()
{
char string[] = "32+-#3#2-#3#3";
for (char *token = string; *token; ++token)
if ('0'<=*token && *token<='9' || *token=='+' || *token=='-')
{
putchar(*token);
if (token[1]!='+' && token[1]!='-') putchar('\n');
}
}
If you consider this too easy, you can use a regular expression to match the tokens:
#include <stdio.h>
#include <regex.h>
int main()
{
char *string = "32+-#3#2-#3#3";
regex_t reg;
regcomp(&reg, "[0-9][+-]*", 0);
regmatch_t match = {0};
while (regexec(&reg, string+=match.rm_eo, 1, &match, 0) == 0)
printf("%.*s\n", (int)(match.rm_eo-match.rm_so), string+match.rm_so);
}
There is a simple way to achieve this, but in C is a bit more complicated since we don't have vector as in C++ but I can suggest a pure C implementation which can be improved:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void split_ss(const char* src,const char* pattern, char** outvec, size_t* outsize)
{
const size_t pat_len = strlen(pattern);
char* begin = (char*) src;
const char* next = begin;
if ((begin = strstr((const char*)begin, pattern)) != 0x00) {
unsigned int size = begin - next;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec , next, size);
outvec++;
(*outsize)+=1;
split_ss(begin+pat_len, pattern, outvec, outsize);
} else {
unsigned int size = &src[strlen(src)-1] - next + 1;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec, next, size);
(*outsize) += 1;
}
}
int main()
{
char* outdata[64] = {0};
size_t size, i=0;
split_ss("32+-#3#2-#3#3", "#", outdata, &size);
for(i=0; i < size; i++) {
printf("[%s]\r\n", outdata[i]);
}
// make sure to free it
return 0;
}
strstr is used to split by string rather than a character. Also output is a poorman 2D array with out size to iterate it and don't forget to free it.
strtok() is not the right tool for you purpose... As a matter of fact strtok() is rarely the right tool for any purpose because of its tricky semantics and side effects.
A simple loop will do:
#include <stdio.h>
int main(void) {
char string[50] = "32+-#3#2-#3#3";
for (char *p = string; *p; p++) {
if (*p == '#')
continue;
putchar(*p);
while (p[1] == '+' || p[1] == '-')
putchar(*++p);
putchar('\n');
}
return 0;
}

How to copy a string to a 2D array in c

I want to create a c program that when the user enters some words like this: "some,words, in, c, proramming." the program save words in the string "str", then it creates Dynamically a 2D array and copies the words into the 2D array:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include <conio.h>
void freeMememory(int**array, int row){
for(int i=0;i<row;i++)
free(array[i]);
free(array);
}
int lettersCount(char *arr){
int space=0, letters=0;
do{
if(*arr !=' '&& *arr!='\t' && *arr!=','&& *arr!='.'){
letters =letters+1;
}
++arr;
}while(*arr);
return letters;
}
int wordCount(char *arr){
int space=0, words=0;
for(int i=0; arr[i]!='\0'; i++){
if(arr[i] ==' '|| arr[i]=='\t'|| arr[i]=='\n'||arr[i]==','||arr[i]=='.'){
space++;
}
if(space>0){
words++;
space=0;
}
}
return words;
}
int main (){
char arr[100];
int i, j, row, column;
scanf("%[^\n]s", &arr);
int *words = wordCount(arr);
int *letters = lettersCount(arr);
row=words;
column=letters;
int **ptr = (int **)malloc(row*column*sizeof(int));
for(i=0;i<row;i++){ptr[i]=(int*)malloc(column*sizeof(int));}
/*
//how should I write here to copy only words from arr to ptr?
like this:
arr = "some words, two,three,four."
ptr = {
"some", "words", "two", "", "three", "four",
}
*/
freeMememory(ptr, row);
return 0;}
So any ideas how to copy only the words from the string into the 2D array without copying (periods, spaces, cammas)?
What you might be looking for is strtok from <string.h>. I will also replace row with rows and column with columns in the following code snippet, as suggested by tadman in the comments.
/* no need to cast `malloc` */
char *ptr[rows];
for (int i = 0; i < rows; ++i) {
ptr[i] = malloc(columns);
if (!token) {
fprintf(stderr, "Error: memory allocation failed\n");
exit(EXIT_FAILURE);
}
}
const char *delims = " \t\n,.";
/* second argument are delimiters */
strcpy(ptr[0], strtok(arr, delims));
for (int i = 1; i < rows; ++i)
strcpy(ptr[i], strtok(NULL, delims));
I would also suggest simplifying your functions. For example your wordCount function could probably be simplified to this:
int count_words(char *str, const char *delims)
{
words = 1;
for (int i = 0; str[i] != '\0'; ++i)
if (strchr(delims, str[i]))
++words;
return words;
}
The function count_words could then be called like this:
const char *delims = " \t\n,.";
int words = count_words(arr, delims);
First notice that your code isn't using a 2D array. It's using an array of char-pointers that each point to a char-array. It's a different thing but it can be used in much the same way.
Below is an implementation that uses strtok to split the input string. Further, it uses realloc to make the array of char-pointers grow when a new word is found. Finally it uses a sentinel (i.e. NULL) to indicate end-of-words.
The code is pretty simple but the performance is poor.
Example:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char* str)
{
if (str == NULL) exit(1);
// Copy input string as strtok changes its input
char* str_cpy = malloc(strlen(str) + 1);
if (str_cpy == NULL) exit(1);
strcpy(str_cpy, str);
unsigned num_rows = 0;
char** arr = NULL;
// Get first token
const char *delims = " \t\n,.";
char* ptr = strtok(str_cpy, delims);
while (ptr)
{
// Allocate one more row
arr = realloc(arr, (num_rows + 1) * sizeof *arr);
if (arr == NULL) exit(1);
// Allocate memory for one more word
arr[num_rows] = malloc(strlen(ptr) + 1);
if (arr[num_rows] == NULL) exit(1);
strcpy(arr[num_rows], ptr);
++num_rows;
// Get next token
ptr = strtok(NULL, delims);
}
// Add a sentinel to indicate end-of-words
arr = realloc(arr, (num_rows + 1) * sizeof *arr);
if (arr == NULL) exit(1);
arr[num_rows] = NULL;
free(str_cpy);
return arr;
}
int main(void)
{
char* str = "some,words, in, c, programming.";
char** arr = split(str);
printf("Original string: %s\n", str);
for (int i=0; arr[i] != NULL; ++i)
{
printf("Word[%d]: %s\n", i, arr[i]);
}
// Free array
for (int i=0; arr[i] != NULL; ++i)
{
free(arr[i]);
}
free(arr);
return 0;
}
Output:
Original string: some,words, in, c, programming.
Word[0]: some
Word[1]: words
Word[2]: in
Word[3]: c
Word[4]: programming

Dynamic memory allocation for an array of pointers to char in C

I'm building a word counter program. To achieve this, I was thinking about saving the string the user inputted, and using strtok() to split the sentence with space as the delimiter. But first I want to allocate enough memory for each word. Let's say the sentence is "Hello World". I've already dynamically allocated memory for the string itself. Now I want to split Hello World into 2 strings, "Hello" and "World". My goal is to allocate enough memory so that there's not too much empty space but I also don't want to allocate too little space. Here is my code so far:
#include <stdio.h>
#include <stdlib.h>
char *strmalloc(char **string);
char *user_input = NULL;
char *word_array[];
int main(void) {
printf("Enter a sentence to find out the number of words: ");
user_input = strmalloc(&user_input);
return 0;
}
char *strmalloc(char **string) {
char *tmp = NULL;
size_t size = 0, index = 0;
int ch;
while ((ch = getchar()) != '\n' && ch != EOF) {
if (size <= index) {
size += 1;
tmp = realloc(*string, size);
if (!tmp) {
free(*string);
string = NULL;
break;
}
*string = tmp;
}
(*string)[index++] = ch;
}
return *string;
}
How would I go about doing this? Should I do the splitting first or allocate the space required for the array first?
You can count words without splitting the sentence, here is an example :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
// Change this to change the separator characters
static inline char isSeparator(char ch) { return isspace(ch) || ispunct(ch); }
char * jumpSeparator(char *string) {
while(string[0] && isSeparator(string[0])) string++;
return string;
}
char * findEndOfWord(char *string) {
while (string[0] && !isSeparator(string[0])) string++;
return string;
}
int countWords(char *string) {
char * ptr = jumpSeparator(string);
if (strlen(ptr) == 0) return 0;
int count = 1;
while((ptr = findEndOfWord(ptr)) && ptr[0]) {
ptr = jumpSeparator(ptr);
if (!ptr) break;
count++;
}
return count;
}
int main() {
char * sentence = "This is,a function... to||count words";
int count = countWords(sentence);
printf("%d\n", count); //====> 7
}
EDIT : Reusing the same functions here is another example that allocates substrings dynamically :
int main() {
char * sentence = "This is,a function... to||split words";
int count = countWords(sentence);
char * ptr = sentence, *start, *end;
char ** substrings = malloc(count * sizeof(char *));
int i=0;
while((ptr = jumpSeparator(ptr)) && ptr[0]) {
start = ptr;
ptr = findEndOfWord(ptr);
end = ptr;
int len = end-start;
char * newString = malloc(len + 1);
memcpy(newString, start, len);
newString[len] = 0;
substrings[i++] = newString;
}
// Prints the result
for(int i=0; i<count; i++) printf("%s\n", substrings[i]);
// Frees the allocated memory
for(int i=0; i<count; i++) free(substrings[i]);
free(substrings);
return 0;
}
Output :
This
is
a
function
to
split
words

How can I split a char* into substrings in C?

I have a text like this:
char* str="Hi all.\nMy name is Matteo.\n\nHow are you?"
and I want to split the string by "\n\n" in to an array like this:
char* array[3];
array[0]="Hi all.\nMy name is Matteo."
array[1]="How are you?"
array[2]=NULL
I've tried the strtok function but it does not split the string correctly.
#include <stdio.h>
#include <string.h>
int main(){
char *str="Hi all.\nMy name is Matteo.\n\nHow are you?";
char *array[3];
char *ptop, *pend;
char wk[1024];//char *wk=malloc(sizeof(char)*(strlen(str)+3));
int i, size = sizeof(array)/sizeof(char*);
/*
array[0]="Hi all.\nMy name is Matteo."
array[1]="How are you?"
array[2]=NULL
*/
strcpy(wk, str);
strcat(wk, "\n\n");
for(i=0, ptop=wk;i<size;++i){
if(NULL!=(pend=strstr(ptop, "\n\n"))){
*pend='\0';
array[i]=strdup(ptop);
ptop=pend+2;
} else {
array[i]=NULL;
break;
}
}
for(i = 0;i<size;++i)
printf("array[%d]=\"%s\"\n", i, array[i]);
return 0;
}
The strtok() function works on a set of single character delimiters. Your goal is to split by a two character delimiter, so strtok() isn't a good fit.
You could scan your input string via a loop that used strchr to find newlines and then checked to see if the next char was also a newline.
A more generic method based on strstr function:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main(void) {
char* str="Hi all.\nMy name is Matteo.\n\nHow are you?\n\nThanks";
char **result = NULL;
unsigned int index = 0;
unsigned int i = 0;
size_t size = 0;
char *ptr, *pstr;
ptr = NULL;
pstr = str;
while(pstr) {
ptr = strstr(pstr, "\n\n");
result = realloc(result, (index + 1) * sizeof(char *));
size = strlen(pstr) - ((ptr)?strlen(ptr):0);
result[index] = malloc(size * sizeof(char));
strncpy(result[index], pstr, size);
index++;
if(ptr) {
pstr = ptr + 2;
} else {
pstr = NULL;
}
} ;
for(i = 0; i < index; i++) {
printf("Array[%d] : >%s<\n", i, result[i]);
}
return 0;
}

Using strtok in c

I need to use strtok to read in a first and last name and seperate it. How can I store the names where I can use them idependently in two seperate char arrays?
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char * test;
test = strtok (str," ");
while (test != NULL)
{
printf ("%s\n",test);
test= strtok (NULL, " ");
}
return 0;
}
Here is my take at a reasonably simple tokenize helper that
stores results in a dynamically growing array
null-terminating the array
keeps the input string safe (strtok modifies the input string, which is undefined behaviour on a literal char[], at least I think in C99)
To make the code re-entrant, use the non-standard strtok_r
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** tokenize(const char* input)
{
char* str = strdup(input);
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
char* tok=strtok(str," ");
while(1)
{
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = tok? strdup(tok) : tok;
if (!tok) break;
tok=strtok(NULL," ");
}
free(str);
return result;
}
int main ()
{
char** tokens = tokenize("test string.");
char** it;
for(it=tokens; it && *it; ++it)
{
printf("%s\n", *it);
free(*it);
}
free(tokens);
return 0;
}
Here is a strtok-free reimplementation of that (uses strpbrk instead):
char** tokenize(const char* str)
{
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
const char* e=str;
if (e) do
{
const char* s=e;
e=strpbrk(s," ");
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = e? strndup(s, e-s) : strdup(s);
} while (e && *(++e));
if (count >= capacity)
result = realloc(result, (capacity+=1)*sizeof(*result));
result[count++] = 0;
return result;
}
Do you need to store them separately? Two pointers into a modified char array will yield two separate perfectly usable strings.
That is we transform this:
char str[] ="test string.";
Into this:
char str[] ="test\0string.";
^ ^
| |
char *s1 ----- |
char *s2 -----------
.
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char *firstname = strtok(str, " ");
char *lastname = strtok(NULL, " ");
if (!lastname)
lastname = "";
printf("%s, %s\n", lastname, firstname);
return 0;
}
What about using strcpy:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_NAMES 2
int main ()
{
char str[] ="test string.";
char *names[MAX_NAMES] = { 0 };
char *test;
int i = 0;
test = strtok (str," ");
while (test != NULL && i < MAX_NAMES)
{
names[i] = malloc(strlen(test)+1);
strcpy(names[i++], test);
test = strtok (NULL, " ");
}
for(i=0; i<MAX_NAMES; ++i)
{
if(names[i])
{
puts(names[i]);
free(names[i]);
names[i] = 0;
}
}
return 0;
}
It contains much clutter to maintain a complete program and clean its resources, but the main point is to use strcpy to copy each token into its own string.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char *str, const char *delimiter, size_t *len){
char *text, *p, *first, **array;
int c;
char** ret;
*len = 0;
text=strdup(str);
if(text==NULL) return NULL;
for(c=0,p=text;NULL!=(p=strtok(p, delimiter));p=NULL, c++)//count item
if(c==0) first=p; //first token top
ret=(char**)malloc(sizeof(char*)*c+1);//+1 for NULL
if(ret==NULL){
free(text);
return NULL;
}
strcpy(text, str+(first-text));//skip until top token
array=ret;
for(p=text;NULL!=(p=strtok(p, delimiter));p=NULL){
*array++=p;
}
*array=NULL;
*len=c;
return ret;
}
void free4split(char** sa){
char **array=sa;
if(sa!=NULL){
free(array[0]);//for text
free(sa); //for array
}
}
int main(void){
char str[] ="test string.";
char **words;
size_t len=0;
int i;
words = split(str, " \t\r\n,.", &len);
/*
for(char **wk = words; *wk ;wk++){
printf("%s\n", *wk);
}
*/
for(i = 0;i<len;++i){
printf("%s\n", words[i]);
}
free4split(words);
return 0;
}
/* result:
test
string
*/
Copy the results from strtok to a new buffer using a function such as
/*
* Returns a copy of s in freshly allocated memory.
* Exits the process if memory allocation fails.
*/
char *xstrdup(char const *s)
{
char *p = malloc(strlen(s) + 1);
if (p == NULL) {
perror("memory allocation failed");
exit(1);
}
strcpy(p, s);
return p;
}
Don't forget to free the return values when you're done with them.
IMO, you don't need (and probably don't want) to use strtok at all (as in, "for this, or much of anything else"). I think I'd use code something like this:
#include <string.h>
#include <stdlib.h>
static char *make_str(char const *begin, char const *end) {
size_t len = end-begin;
char *ret = malloc(len+1);
if (ret != NULL) {
memcpy(ret, begin, len);
ret[len]='\0';
}
return ret;
}
size_t tokenize(char *tokens[], size_t max, char const *input, char const *delims) {
int i;
char const *start=input, *end=start;
for (i=0; *start && i<max; i++) {
for ( ;NULL!=strchr(delims, *start); ++start)
;
for (end=start; *end && NULL==strchr(delims, *end); ++end)
;
tokens[i] = make_str(start, end);
start = end+1;
}
return i;
}
#ifdef TEST
#define MAX_TOKENS 10
int main() {
char *tokens[MAX_TOKENS];
int i;
size_t num = tokenize(tokens, MAX_TOKENS, "This is a longer input string ", " ");
for (i=0; i<num; i++) {
printf("|%s|\n", tokens[i]);
free(tokens[i]);
}
return 0;
}
#endif
U can do something like this too.
int main ()
{
char str[] ="test string.";
char * temp1;
char * temp2;
temp1 = strtok (str," ");
temp2 = strchr(str, ' ');
if (temp2 != NULL)
temp2++;
printf ("Splitted string :%s, %s\n" , temp1 , temp2);
return
}

Resources