C - Split a string at the whitespaces - c

I need to split a string where there are spaces (ex string: Hello this is an example string. into an array of words. I'm not sure what I'm missing here, I'm also curious as to what the best way to test this function is. The only library function allowed is malloc.
Any help is appreciated!
#include <stdlib.h>
char **ft_split(char *str) {
int wordlength;
int wordcount;
char **wordbank;
int i;
int current;
current = 0;
wordlength = 0;
//while sentence
while (str[wordlength] != '\0') {
//go till letters
while (str[current] == ' ')
current++;
//go till spaces
wordlength = 0;
while (str[wordlength] != ' ' && str[wordlength] != '\0')
wordlength++;
//make memory for word
wordbank[wordcount] = malloc(sizeof(char) * (wordlength - current + 1));
i = 0;
//fill wordbank current
while (i < wordlength - current) {
wordbank[wordcount][i] = str[current];
i++;
current++;
}
//end word with '\0'
wordbank[wordcount][i] = '\0';
wordcount++;
}
return wordbank;
}

There are multiple problems in your code:
You do not allocate an array for wordbank to point to, dereferencing an uninitialized pointer has undefined behavior.
Your approach to scanning the string is broken: you reset wordlength inside the loop so you keep re-scanning from the beginning of the string.
You should allocate an extra entry in the array for a trailing null pointer to indicate the end of the array to the caller.
Here is a modified version:
#include <stdlib.h>
char **ft_split(const char *str) {
size_t i, j, k, wordcount;
char **wordbank;
// count the number of words:
wordcount = 0;
for (i = 0; str[i]; i++) {
if (str[i] != ' ' && (i == 0 || str[i - 1] == ' ')) {
wordcount++;
}
}
// allocate the word array
wordbank = malloc((wordcount + 1) * sizeof(*wordbank));
if (wordbank) {
for (i = k = 0;;) {
// skip spaces
while (str[i] == ' ')
i++;
// check for end of string
if (str[i] == '\0')
break;
// scan for end of word
for (j = i++; str[i] != '\0' && str[i] != ' '; i++)
continue;
// allocate space for word copy
wordbank[k] = p = malloc(i - j + 1);
if (p == NULL) {
// allocation failed: free and return NULL
while (k-- > 0) {
free(wordbank[k]);
}
free(wordbank);
return NULL;
}
// copy string contents
memcpy(p, str + j, i - j);
p[i - j] = '\0';
}
// set a null pointer at the end of the array
wordbank[k] = NULL;
}
return wordbank;
}

You need to malloc() wordbank too. You can count the number for words, and then
wordbank = malloc((count + 1) * sizeof(*wordbank));
if (wordbank == NULL)
return NULL;
Note: sizeof(char) is 1 by definition. And sizeof *pointer is always what you want.

Related

How to shrink string by using Realloc

First time asking a question here:
well I need to take the original string
and remove the spaces and numbers from the string
I need to use the exact amount of memory.
For some reason, the string is fine in the beginning
but then it prints garbage values:
original string: "abcd2 34fty 78 jurt#"
what needed to be done: abcdftyjurt#
My code:
#define _CRT_SECURE_NO_WARNINGS
#include <malloc.h>
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
/* Function declarations */
/*-------------------------------------------------------------*/
void Ex1();
char* clearDigitsAndSpaces(char*);
/*-------------------------------------------------------------*/
void Ex2();
/*-------------------------------------------------------------*/
void Ex3();
/*-------------------------------------------------------------*/
/* Declarations of other functions */
int main() {
int select = 0, i, all_Ex_in_loop = 0;
printf("Run menu once or cyclically?\n(Once - enter 0, cyclically - enter other number) ");
if (scanf("%d", &all_Ex_in_loop) == 1)
do {
for (i = 1; i <= 3; i++)
printf("Ex%d--->%d\n", i, i);
printf("EXIT-->0\n");
do {
select = 0;
printf("please select 0-3 : ");
scanf("%d", &select);
} while ((select < 0) || (select > 3));
switch (select) {
case 1: Ex1(); break;
case 2: Ex2(); break;
case 3: Ex3(); break;
}
} while (all_Ex_in_loop && select);
return 0;
}
/* Function definitions */
void Ex1() {
char input[] = "abcd2 34fty 78 jurt#";
char *temp = NULL;
temp = clearDigitsAndSpaces(input);
printf("%s\n ", temp);
free(temp);
}
char *clearDigitsAndSpaces(char *old_string) {
char *new_string;
int count = 0;
int i = 0;
int j = 0;
int size = strlen(old_string);
new_string = (char *)malloc(size * sizeof(char));
assert(new_string); /*Memory allocation check*/
while (old_string[i]) {
if (old_string[i] != ' ' && (old_string[i] > '9' || old_string[i] < '0')) {
new_string[j++] = old_string[i];
} else {
//size -= 1;
new_string = (char *)realloc(new_string, size - 1);
}
i++;
}
assert(new_string);
//printf("%s", new_string);
return new_string;
}
void Ex2() {
}
void Ex3() {
}
The problem in your code is you must allocate one extra byte for the null terminator.
You can avoid using realloc() by first scanning the source string to determine the allocation size and then use a separate loop to copy the contents:
char *clearDigitsAndSpaces(const char *src) {
char *new_string;
size_t size = 1; // 1 extra byte for the null terminator.
for (size_t i = 0; src[i] != '\0'; i++) {
if (src[i] != ' ' && !(src[i] >= '0' && src[i] <= '9'))
size++;
}
new_string = malloc(size);
if (new_string) {
size_t j = 0;
for (size_t i = 0; src[i] != '\0'; i++) {
if (src[i] != ' ' && !(src[i] >= '0' && src[i] <= '9'))
new_string[j++] = src[i];
}
new_string[j] = '\0'; // set the null terminator
}
return new_string;
}
Firstly: you need to understand the difference between the length of a C-string and the size of a C-string. The length does not include the null terminator. The size does. So this snippet:
int size = strlen(old_string);
new_string = (char*)malloc(size * sizeof(char));
needs to be
int size = strlen(old_string) + 1;
new_string = (char*)malloc(size * sizeof(char));
(note that if you're using Unicode in Windows, with wchar_t instead of char, then the size in bytes is twice the length, plus 2 - each character is two bytes, as well as the null terminator aka 'sentinel')
Secondly: I would suggest you use parenthesis to be explicit about intention. It may not be "absolutely necessary", but there would be no doubt about the intention when someone else reads your code. Also avoid indexing the same thing repeatedly. Change:
if (old_string[i]!=' ' && (old_string[i] > '9' || old_string[i]< '0'))
to:
char oldChar = old_string[i];
if ((oldChar != ' ')
&& ((oldChar > '9') || (oldChar < '0'))
)
Finally, you need to emplace a null character at the end. You don't need to realloc; just use not all of the buffer. Change:
new_string = (char*)realloc(new_string, size-1);
to:
new_string[j++] = '\0';
// PS: if you really want to realloc, then add "new_string = (char*)realloc(new_string, j);" after writing the null character.
Also - if you change the malloc to a calloc, you won't need to write a null terminator, since the entire buffer would be nulled before you copied anything to it.
Furthermore, I would add a defensive limit check to i in the while loop to ensure it cannot go on ad-infinitum.

Sorting words in alphabetically order C

So my exercise is to sort words in 1D char array. My code is almost working, but it always skips the last char of the last word. Here is my code. I've added some comments to make it somehow readable. I know it's not brilliant code but I've just started with programming.
int main(void) {
char input[] = "If you are working on something that you really care about you dont have to be pushed The vision pulls you Steve Jobs";
sort_alphabetically(input);
printf("%s", input);
}
int sort_alphabetically(char tab[]) {
int j = 0, k = 0, i = 0, g = 0, f = 0, l = 0;
char tmp[1001];
char tmp2[501][1001];
while (tab[i] == ' ') // skipping leading whitespaces
i++;
for (j = i; tab[j] != '\0'; j++) {
if (tab[j] != ' ' && tab[j + 1] != '\0')
k++; // counting word length
else if (tab[j] == ' ' || tab[j + 1] == '\0' || tab[j + 1] == '\0') {
// copying word t0 2d array
for (g = k; g > 0; g--) {
tmp[l] = tab[j - g];
l++;
}
tmp[l] = 0;
strcpy(tmp2[f], tmp); // copying
f++; //words ++ in tmp2
k = 0;
l = 0;
tmp[0] = 0;
}
}
tab[0] = 0;
tmp[0] = 0;
for (j = 0; j < f; j++) {
for (i = 0; i < f - 1; i++) {
if (strcmp(tmp2[i], tmp2[i + 1]) > 0) { //sorting words in alphabeticall order
strcpy(tmp, tmp2[i]);
strcpy(tmp2[i], tmp2[i + 1]);
strcpy(tmp2[i + 1], tmp);
}
}
}
for (i = 0; i < f; i++) {
strcat(tab, tmp2[i]); // copying to tab
strcat(tab, " "); //adding spaces after each word
}
// removing whitespaces
for (i = 0; tab[i] == ' ' || tab[i] == '\t'; i++);
for (j = 0; tab[i]; i++) {
tab[j++] = tab[i];
}
tab[j] = '\0';
}
;
After running this code it cuts the s in last word (Jobs). If someone can help me with this spaghetti I would be so happy.
The problem was with how you were handling the null byte vs the space. In the space case, you were actually on the space when you copied the string. But in the null byte case, you were one before the null byte. This leads to an off-by-one error. You need to modify the code to avoid handling it differently for spaces and null bytes:
for (j = i; tab[j] != '\0'; j++) {
//In the space case, you are on the space, but in the \0 case
//you were one before it.
//Changed this if statement so that you always copy the string
//when you're at the last character.
if (tab[j + 1] == ' ' || tab[j + 1] == '\0') {
//k is a length, but we're using it as an index
//so we will need to adjust by one
for (g = k; g > 0; g--) {
tmp[l] = tab[j - g + 1];
l++;
}
}
else
{
k++;
}
}
I worked this out by putting print statements that showed me the value of tab[j] and the value of k at each cycle. Watching your program execute, either with print statements or a debugger, is usually the best way to diagnose these sorts of issues.
The problem you have is in copying characters to the tmp buffer when you reach the end of the input (tab) string; that is, when tab[j + 1] == '\0' is true. In this case, you aren't copying the last data in this the for loop:
for (g = k; g > 0; g--) {
tmp[l] = tab[j - g];
l++;
}
To fix the issue, simply change the loop's 'condition' to include when g is zero, and skip this 'iteration' when you encounter a space character:
for (g = k; g >= 0; g--) { // Make sure to include any 'last' character
if (tab[j - g] != ' ') { // ... but skip if this is a space
tmp[l] = tab[j - g];
l++;
}
}
Note also that you have a redundant test in this line:
else if (tab[j] == ' ' || tab[j + 1] == '\0' || tab[j + 1] == '\0') {
which could just as well be written without the third test (which is the same as the second), thus:
else if (tab[j] == ' ' || tab[j + 1] == '\0') {
Caveat: Most of the other responders have pointed out the major bugs in your code, but this has some smaller ones and some simplification.
Before doing strcat back to tab, we should do tab[0] = 0 so the initial strcat works correctly.
Doing strcat(tab," ") after the one that copies the word goes one beyond the end of tab and is, therefore, undefined behavior. It also requires an unnecessary cleanup loop to remove the extra space that should not have been there in the first place.
The initial "split into words" loop can be [greatly] simplified.
There are some standard speedups to the bubble sort
I realize that you're just starting out [and some schools actually advocate for i, j, etc], but it's better to use some [more] discriptive names
Anyway, here's a somewhat refactored version:
#include <stdio.h>
#include <string.h>
int opt_dbg;
#define dbg(_fmt...) \
if (opt_dbg) \
printf(_fmt)
void
sort_alphabetically(char tab[])
{
char tmp[1001];
char words[501][1001];
char *src;
char *dst;
char *beg;
int chr;
int wordidx;
int wordcnt;
wordidx = 0;
dst = words[wordidx];
beg = dst;
// split up string into individual words
src = tab;
for (chr = *src++; chr != 0; chr = *src++) {
switch (chr) {
case ' ':
case '\t':
// wait until we've seen a non-white char before we start a new
// word
if (dst <= beg)
break;
// finish prior word
*dst = 0;
// point to start of next word
dst = words[++wordidx];
beg = dst;
break;
default:
*dst++ = chr;
break;
}
}
// finish last word
*dst = 0;
// get number of words
wordcnt = wordidx + 1;
if (opt_dbg) {
for (wordidx = 0; wordidx < wordcnt; ++wordidx)
dbg("SPLIT: '%s'\n",words[wordidx]);
}
// in bubble sort, after a given pass, the _last_ element is guaranteed to
// be the largest, so we don't need to examine it again
for (int passlim = wordcnt - 1; passlim >= 1; --passlim) {
int swapflg = 0;
// sorting words in alphabetical order
for (wordidx = 0; wordidx < passlim; ++wordidx) {
char *lhs = words[wordidx];
char *rhs = words[wordidx + 1];
if (strcmp(lhs,rhs) > 0) {
dbg("SWAP/%d: '%s' '%s'\n",passlim,lhs,rhs);
strcpy(tmp,lhs);
strcpy(lhs,rhs);
strcpy(rhs,tmp);
swapflg = 1;
}
}
// if nothing got swapped, we can stop early (i.e. everything is in
// sort)
if (! swapflg)
break;
}
// clear out destination so [first] strcat will work
tab[0] = 0;
// copy back words into original string
// adding the space as a _prefix_ before a word eliminates the need for a
// cleanup to remove the last space
for (wordidx = 0; wordidx < wordcnt; ++wordidx) {
dbg("SORTED: '%s'\n",words[wordidx]);
// adding spaces before each word
if (wordidx > 0)
strcat(tab, " ");
// copying to tab
strcat(tab,words[wordidx]);
}
}
int
main(int argc,char **argv)
{
char input[] = "If you are working on something that you really care"
" about you dont have to be pushed The vision pulls you Steve Jobs";
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
switch (cp[1]) {
case 'd':
opt_dbg = ! opt_dbg;
break;
}
}
sort_alphabetically(input);
printf("%s\n", input);
return 0;
}

Function and strings

I'm new to C and have started learning about strings. I want to create a function called
void SpaceRemover(char *input )
It should remove the spaced from a given string array that has lots of space
The code that I've produced so far removes all the spaces and doesn't provide the output I'm looking for. Can anyone help me with this?
char* SpaceRemover(char *input){
char *output=input;
for (int i = 0, j = 0; i<strlen(input); i++,j++)
{
if (input[i]!=' ')
output[j]=input[i];
else
j--;
}
return output;
}
I made this but I know its wrong and does not do what i want it to but honestly this is all i could this of -_-
You could just do as below
void SpaceRemover(char *name)
{
int i=0,j=0;
for (i = 0;i<strlen(name);i++)
{
if (name[i] != ' ' || (name[i] == ' ' && name[i+1] != ' ' && j!= 0))
{
name[j++] = name[i];
}
}
name[j]='\0'; //Terminate the string to avoid junk chars
}
Where
if (name[i] != ' ' || (name[i] == ' ' && name[i+1] != ' ' && j != 0))
will let you copy only if current char is not space or current char is space and next char is not space(to include single space apart in the beginning).
Also don't forget to terminate the string.
name[j]='\0';
The problem is that you remove all spaces.
char *SpaceRemover(char *name){
char *output = name;
int j = 0;
for (int i = 0; i < strlen(name); i++) {
if (name[i] != ' ' || (name[i] == ' ' && name[i + 1] != ' ') {
output[j] = name[i];
j += 1;
}
}
output[j] = '\0'
return output;
}
This condition should let one space through.
You might notice I replaced the void return type with a char * so t o use the function you will need to use:
name = SpaceRemover(name);
I would be using a flag to activate when a space is met.
This might need to be tweaked if you want to remove leading and trailing spaces too.
A space will be added to output and the flag will be used to avoid the next ones to be added. The flag will be deactivated when something else than a space is met.
As stated Alex in comments, decrementing j in loop while it's incremented in the for statement isn't recommended.
I would copy each characters in the for loop instead of filtering a pre-copied output.
char space_found = 0;
char *output = malloc(sizeof(char) * (strlen(name) + 1));
int j = 0;
for (int i = 0; i < strlen(name); ++i)
{
if (name[i] == ' ' and space_found == 0)
{
space_found = 1;
output[j++] = name[i];
}
if (name[i] != ' ')
{
space_found = 0;
output[j++] = name[i];
}
}
output[j] = '\0';
void spaceRemover(char* str)
{
char temp[50] = {0};
int j = 0;
strncpy(temp, str, strlen(str) + 1);
for(int i = 0; i < strlen(str); i++)
{
if(temp[i] != ' ')
{
str[j] = temp[i];
j++;
}
}
str[j] = 0;
}
if you have any questions, feel free to ask, Good Luck
#include <stdio.h>
char *rem(char *str)
{
char *cur=str;
char *nex=str;
while(*nex)
{
if(*nex == ' ') nex++;
else *cur++ = *nex++;
}
*cur=0;
return str;
}
int main(void) {
char z[]=" etc def etc def ";
printf("%s\n", rem(z));
return 0;
}

Split whitespaces in a string and store them in table in C without libraries

I had a assignment for my class last week where I had to split a string with the spaces, tabs et \n as separators and store every "word" in an array. I think I'm very close but my output is very weird so if someone could tell what did I forget, it would be nice. Only thing is I can only use malloc.
char **ft_split_whitespaces(char *str)
{
int i;
int j;
int k;
char **tab;
i = 0;
j = 0;
k = 0;
tab = (char**)malloc(sizeof(*tab) * (ft_nb_words(str) + 1));
while (str[i])
{
while (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')
i++;
if (str[i])
{
if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(str + i) + 1))) == NULL)
return (NULL);
while (k < ft_len_word(str + i))
tab[j][k++] = str[i++];
tab[j++][k] = '\0';
k = 0;
}
}
tab[j] = NULL;
return (tab);
}
The functions returning the length of a word and the number of words work fine so I think the problem comes from the main function.
This can be easily handled if you take one pointer to point the last occurrence of specific character('' '\n' \t).
char **ft_split_whitespaces(char *str)
{
int i;
int j;
int k;
char **tab;
char *prevToken=str;
i = 0;
j = 0;
k = 0;
tab = (char**)malloc(sizeof(*tab) * (ft_nb_words(str) + 1));
while (str[i] != '\0')
{
if(str[i] == ' ' || str[i] == '\t' || str[i] == '\n')
{
i++;
if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(prevToken) + 1))) == NULL)
return (NULL);
while (k < ft_len_word(prevToken) &&
(prevToken[k] !=' ' && prevToken[k] != '\t' && prevToken[k] != '\n'))
tab[j][k] = prevToken[k++];
printf("tab=%s\n", tab[j]);
k = 0;
j++;
prevToken=(str+i);
}
else{
i++;
}
}
/* to handle the last word */
if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(prevToken) + 1))) == NULL)
return (NULL);
while (k < ft_len_word(prevToken) &&
(prevToken[k] !=' ' && prevToken[k] != '\t' && prevToken[k] != '\n'))
tab[j][k] = prevToken[k++];
printf("tab=%s\n", tab[j]);
tab[j] = NULL;
return (tab);
}
The following code contains an implementation of some useful C functions.
The function you search is strtok(). In the code are implemented also the functions strspn() and strpbrk() because strtok() uses them.
The best way to solve this kind of problems is to study the implementation of C standard functions.
The code stores the copies of max 100 token (the extracted words).
You have to remember that the function strtok() modifies the content of the source string inserting '\0' to terminate the strings found.
The functions here implemented are:
mystrtok()
mystrspn()
mystrpbrk()
The code:
#include <stdio.h>
#include <string.h> /* for the use of strcpy fn */
#include <malloc.h>
char * mystrtok (char * s, char * delim);
size_t mystrspn (const char *s, const char *accept);
char * mystrpbrk (const char *s, const char *accept);
char * mystrpbrk (const char *s, const char *accept)
{
while (*s != '\0')
{
const char *a = accept;
while (*a != '\0')
if (*a++ == *s)
return (char *) s;
++s;
}
return NULL;
}
size_t mystrspn (const char *s, const char *accept)
{
const char *p;
const char *a;
size_t count = 0;
for (p = s; *p != '\0'; ++p)
{
for (a = accept; *a != '\0'; ++a)
if (*p == *a)
break;
if (*a == '\0')
return count;
else
++count;
}
return count;
}
char * mystrtok (char *s, char *delim)
{
char *token;
static char *olds;
if (s == NULL) {
s = olds;
}
/* Scan leading delimiters. */
s += mystrspn (s, delim);
if (*s == '\0')
{
olds = s;
return NULL;
}
/* Find the end of the token. */
token = s;
s = mystrpbrk (token, delim);
if (s == NULL)
{
/* This token finishes the string. */
while(*olds)
olds++;
}
else
{
/* Terminate the token and make OLDS point past it. */
*s = '\0';
olds = s + 1;
}
return token;
}
int main(void)
{
char str[] = "I have an orange\tYou have some bananas\nShe has three pineapples\n";
char * x = NULL;
int cnt=0,i;
char **store;
/* Stores a max of 100 strings */
store = malloc(sizeof(char *)*100);
/* The total space for the tokens is
max the entire string + '\0' */
store[0] = malloc(strlen(str)+1);
/* Extract the first token */
x=mystrtok(str," \n");
while(x) {
printf("Storing %s\n",x);
/* Store a copy of the token */
strcpy(store[cnt],x);
store[cnt+1]=store[cnt]+strlen(x)+1;
cnt++;
/* extract the next token */
x=mystrtok(NULL," \n\t");
}
for(i=0;i<cnt;i++)
printf("Stored %s\n",store[i]);
free(store[0]);
free(store);
return 0;
}
Your code is inefficient as you call ft_len_word far too many times, but it does not seem broken apart from the undefined behavior on malloc failures.
The problem might lie in your versions of ft_len_word or ft_nb_words. You should post a full program exhibiting the problem for a proper investigation.
Here is a modified version that does not use these functions:
#include <stdlib.h>
int ft_is_space(char c) {
return (c == ' ' || c == '\t' || c == '\n');
}
char **ft_split_whitespaces(const char *str) {
int i, j, k, len, in_space, nb_words;
char **tab;
nb_words = 0;
in_space = 1;
for (i = 0; str[i]; i++) {
if (ft_is_space(str[i]) {
in_space = 1;
} else {
nb_words += in_space;
in_space = 0;
}
}
tab = malloc(sizeof(*tab) * (nb_words + 1));
if (tab != NULL) {
i = 0;
j = 0;
while (str[i]) {
while (ft_is_space(str[i]))
i++;
if (str[i]) {
for (len = 1; str[i + len] && !ft_is_space(str[i + len]); len++)
continue;
if ((tab[j] = malloc(sizeof(*tab[j]) * (len + 1))) == NULL) {
while (j > 0)
free(tab[--j]);
free(tab);
return NULL;
}
for (k = 0; k < len; k++)
tab[j][k] = str[i + k];
tab[j++][len] = '\0';
i += len;
}
}
tab[j] = NULL;
}
return tab;
}
You need to implement your version of strtok() if you do not want to use the library function or need a different functionality than the one provided by strtok().
Below is a simple string tokenizer, which, unlike the standard library's strtok(), still returns a value in case of consecutive delimiters. I used this function to parse CSV files, which sometimes include empty cells, hence consecutive , characters. Standard library's strtok() did not work for me, so I had to implement my own function.
I used other helper functions, which are now part of a simple string library I maintain on GitHub, called zString.
Below is how it behaves
Example Usage
char str[] = "A,B,,,C";
printf("1 %s\n",zstring_strtok(s,","));
printf("2 %s\n",zstring_strtok(NULL,","));
printf("3 %s\n",zstring_strtok(NULL,","));
printf("4 %s\n",zstring_strtok(NULL,","));
printf("5 %s\n",zstring_strtok(NULL,","));
printf("6 %s\n",zstring_strtok(NULL,","));
Example Output
1 A
2 B
3 ,
4 ,
5 C
6 (null)
and the code
char *zstring_strtok(char *str, const char *delim) {
static char *static_str=0; /* var to store last address */
int index=0, strlength=0; /* integers for indexes */
int found = 0; /* check if delim is found */
/* delimiter cannot be NULL
* if no more char left, return NULL as well
*/
if (delim==0 || (str == 0 && static_str == 0))
return 0;
if (str == 0)
str = static_str;
/* get length of string */
while(str[strlength])
strlength++;
/* find the first occurrence of delim */
for (index=0;index<strlength;index++)
if (str[index]==delim[0]) {
found=1;
break;
}
/* if delim is not contained in str, return str */
if (!found) {
static_str = 0;
return str;
}
/* check for consecutive delimiters
*if first char is delim, return delim
*/
if (str[0]==delim[0]) {
static_str = (str + 1);
return (char *)delim;
}
/* terminate the string
* this assignment requires char[], so str has to
* be char[] rather than *char
*/
str[index] = '\0';
/* save the rest of the string */
if ((str + index + 1)!=0)
static_str = (str + index + 1);
else
static_str = 0;
return str;
}

String tokenizer without using strtok()

I'm in the process of writing a string tokenizer without using strtok(). This is mainly for my own betterment and for a greater understanding of pointers. I think I almost have it, but I've been receiving the following errors:
myToc.c:25 warning: assignment makes integer from pointer without a cast
myToc.c:35 (same as above)
myToc.c:44 error: invalid type argument of 'unary *' (have 'int')
What I'm doing is looping through the string sent to the method, finding each delimiter, and replacing it with '\0.' The "ptr" array is supposed to have pointers to the separated substrings. This is what I have so far.
#include <string.h>
void myToc(char * str){
int spcCount = 0;
int ptrIndex = 0;
int n = strlen(str);
for(int i = 0; i < n; i++){
if(i != 0 && str[i] == ' ' && str[i-1] != ' '){
spcCount++;
}
}
//Pointer array; +1 for \0 character, +1 for one word more than number of spaces
int *ptr = (int *) calloc(spcCount+2, sizeof(char));
ptr[spcCount+1] = '\0';
//Used to differentiate separating spaces from unnecessary ones
char temp;
for(int j = 0; j < n; j++){
if(j == 0){
/*Line 25*/ ptr[ptrIndex] = &str[j];
temp = str[j];
ptrIndex++;
}
else{
if(str[j] == ' '){
temp = str[j];
str[j] = '\0';
}
else if(str[j] != ' ' && str[j] != '\0' && temp == ' '){
/*Line 35*/ ptr[ptrIndex] = &str[j];
temp = str[j];
ptrIndex++;
}
}
}
int k = 0;
while(ptr[k] != '\0'){
/*Line 44*/ printf("%s \n", *ptr[k]);
k++;
}
}
I can see where the errors are occurring but I'm not sure how to correct them. What should I do? Am I allocating memory correctly or is it just an issue with how I'm specifying the addresses?
You pointer array is wrong. It looks like you want:
char **ptr = calloc(spcCount+2, sizeof(char*));
Also, if I am reading your code correctly, there is no need for the null byte as this array is not a string.
In addition, you'll need to fix:
while(ptr[k] != '\0'){
/*Line 44*/ printf("%s \n", *ptr[k]);
k++;
}
The dereference is not required and if you remove the null ptr, this should work:
for ( k = 0; k < ptrIndex; k++ ){
/*Line 44*/ printf("%s \n", ptr[k]);
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void myToc(char * str){
int spcCount = 0;
int ptrIndex = 0;
int n = strlen(str);
for(int i = 0; i < n; i++){
if(i != 0 && str[i] == ' ' && str[i-1] != ' '){
spcCount++;
}
}
char **ptr = calloc(spcCount+2, sizeof(char*));
//ptr[spcCount+1] = '\0';//0 initialized by calloc
char temp = ' ';//can simplify the code
for(int j = 0; j < n; j++){
if(str[j] == ' '){
temp = str[j];
str[j] = '\0';
} else if(str[j] != '\0' && temp == ' '){//can omit `str[j] != ' ' &&`
ptr[ptrIndex++] = &str[j];
temp = str[j];
}
}
int k = 0;
while(ptr[k] != NULL){//better use NULL
printf("%s \n", ptr[k++]);
}
free(ptr);
}
int main(){
char test1[] = "a b c";
myToc(test1);
char test2[] = "hello world";
myToc(test2);
return 0;
}
Update: I tried this at http://www.compileonline.com/compile_c99_online.php
with the fixes for lines 25, 35, and 44, and with a main function that called
myToc() twice. I initially encountered segfaults when trying to write null characters
to str[], but that was only because the strings I was passing were (apparently
non-modifiable) literals. The code below worked as desired when I allocated a text buffer and wrote the strings there before passing them in. This version also could be modified to return the array of pointers, which then would point to the tokens.
(The code below also works even when the string parameter is non-modifiable, as long as
myToc() makes a local copy of the string; but that would not have the desired effect if the purpose of the function is to return the list of tokens rather than just print them.)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void myToc(char * str){
int spcCount = 0;
int ptrIndex = 0;
int n = strlen(str);
for(int i = 0; i < n; i++){
if(i != 0 && str[i] == ' ' && str[i-1] != ' '){
spcCount++;
}
}
//Pointer array; +1 for one word more than number of spaces
char** ptr = (char**) calloc(spcCount+2, sizeof(char*));
//Used to differentiate separating spaces from unnecessary ones
char temp;
for(int j = 0; j < n; j++){
if(j == 0){
ptr[ptrIndex] = &str[j];
temp = str[j];
ptrIndex++;
}
else{
if(str[j] == ' '){
temp = str[j];
str[j] = '\0';
}
else if(str[j] != ' ' && str[j] != '\0' && temp == ' '){
ptr[ptrIndex] = &str[j];
temp = str[j];
ptrIndex++;
}
}
}
for (int k = 0; k < ptrIndex; ++k){
printf("%s \n", ptr[k]);
}
}
int main (int n, char** v)
{
char text[256];
strcpy(text, "a b c");
myToc(text);
printf("-----\n");
strcpy(text, "hello world");
myToc(text);
}
I would prefer simpler code, however. Basically you want a pointer to the first non-blank character in str[], then a pointer to each non-blank (other than the first) that is preceded by a blank. Your first loop almost gets this idea except it is looking for blanks preceded by non-blanks. (Also you could start that loop at i = 1 and avoid having to test i != 0 on each iteration.)
I might just allocate an array of char* of size sizeof(char*) * (n + 1)/2 to hold the pointers rather than looping over the string twice (that is, I'd omit the first loop, which is just to figure out the size of the array). In any case, if ptr[0] is non-blank I would write its address to the array; then looping for (int j = 1; j < n; ++j), write the address of str[j] to the array if str[j] is non-blank and str[j - 1] is blank--basically what you are doing, but with fewer ifs and fewer auxiliary variables.
Less code means less opportunity to introduce a bug, as long as the code is clean and makes sense.
Previous remarks:
int *ptr = declares an array of int. For an array of pointers to char, you want
char** ptr = (char**) calloc(spcCount+2, sizeof(char*));
The comment prior to that line also seems to indicate some confusion. There is no terminating null in your array of pointers, and you don't need to allocate space for one, so possibly spcCount+2 could be spcCount + 1.
This also is suspect:
while(ptr[k] != '\0')
It looks like it would work, given the way you used calloc (you do need spcCount+2 to make this work), but I would feel more secure writing something like this:
for (k = 0; k < ptrIndex; ++k)
I do not thing that is what caused the segfault, it just makes me a little uneasy to compare a pointer (ptr[k]) with \0 (which you would normally compare against a char).

Resources