C - Counting words in a string - c

i've been trying to do a function that counts the number of words in a string in C. However, in some casas (as the one in the example) it should return 0 and not 1... any ideas of what could be wrong?
#import <stdio.h>
int contaPal(char s[]) {
int r;
int i;
r = 0;
for (i = 0; s[i] != '\0'; i++) {
if (s[i] == '\n')
r = r + 0;
if (s[i] != ' ' && s[i + 1] == ' ' && s[i + 1] != '\0')
r++;
if (s[i] != ' ' && s[i + 1] == '\0') {
r++;
}
}
return r;
}
int main () {
char s[15] = { ' ', '\n', '\0' };
printf("Words: %d \n", (contaPal(s)));
return 0;
}

You should not treat '\n' differently from any other whitespace character.
Here is a simpler version:
#include <ctype.h>
#include <stdio.h>
int contaPal(const char *s) {
int count = 0, hassep = 1;
while (*s) {
if (isspace((unsigned char)*s) {
hassep = 1;
} else {
count += hassep;
hassep = 0;
}
s++;
}
return count;
}
int main(void) {
char s[] = " \n";
printf("Words: %d\n", contaPal(s));
return 0;
}

I suppose that the word is any sequence of characters excluding white space characters.
Your function returns 1 because for the supplied string when the new line character is encountered the variable r is increased due to this condition
if (s[i] != ' ' && s[i + 1] == '\0') {
r++;
}
So the function implementation is wrong.
It can be defined the following way as it is shown in the demonstrative program
#include <stdio.h>
#include <ctype.h>
size_t contaPal( const char s[] )
{
size_t n = 0;
while ( *s )
{
while ( isspace( ( unsigned char )*s ) ) ++s;
n += *s != '\0';
while ( *s && !isspace( ( unsigned char )*s ) ) ++s;
}
return n;
}
int main(void)
{
char s[] = { ' ', '\n', '\0' };
printf( "Words: %zu\n", contaPal( s ) );
return 0;
}
Its output as you expect is
Words: 0

A simple illustration using existing character test functions:
int main(void)
{
int cnt = 0;
int numWords = 0;
BOOL trap = 0; //start count only after seeing a word
char *sentence = "This is a sentence, too long.";
//char *sentence2 = " ";//tested for empty string also
while (*sentence != '\0')
{
if ( isalnum (*sentence) ) //word is found, set trap and start count
{
sentence++; //alpha numeric character, keep going
trap = 1;
}
else if ( (( ispunct (*sentence) ) || ( isspace(*sentence) )) && trap)
{ //count is started only after first non delimiter character is found
numWords++;
sentence++;
while(( ispunct (*sentence) ) || ( isspace(*sentence) ))
{ //handle sequences of word delimiters
sentence++;
}
}
else //make sure pointer is increased either way
{
sentence++;
}
}
return 0;
}

The line:
if (s[i] != ' ' && s[i + 1] == ' ' && s[i + 1] != '\0')
r++;
Exactly matches the case when you look on '\n'.
You should use if ... else if ....

Related

Capitalize every word in a string when there are multiple white spaces

I am trying to capitalize every word in and here is my code:
char *cap_string(char *str)
{
int i;
for (i = 0; str[i] != '\0'; i++)
{
if (i == 0)
{
if (str[i] >= 'a' && str[i] <= 'z')
str[i] -= 32;
continue;
}
if (str[i] == ' ')
{
++i;
if (str[i] >= 'a' && str[i] <= 'z')
{
str[i] -= 32;
continue;
}
}
else
{
if (str[i] == '.')
{
++i;
if (str[i] >= 'a' && str[i] <= 'z')
{
str[i] -= 32;
continue;
}
}
}
}
return (str);
}
My question is that my code works fine in most cases, but does not function properly if it encounters multiple white spaces. How can I capitalize a word preceded by multiple white spaces?
Change your code to the following:-
char *cap_string(char *str)
{
int i;
for (i = 0; str[i] != '\0'; i++)
{
if (i == 0 || str[i - 1] == ' ' || str[i - 1] == '.' || str[i-1] == '\n')
{
if (str[i] >= 'a' && str[i] <= 'z')
str[i] -= 32;
}
}
return (str);
}
Testing all cases, using the following code,
#include <stdio.h>
int main()
{
char str[] = "hello world.hello";
printf("%s", cap_string(str));
return 0;
}
returns
Hello World.Hello
I have tried to keep your logic intact and not use any string.h library functions.
You have to keep in mind that the other conditions after the || operator are not checked if the first condition is evaluated as true. So str[-1] never occurs.
The main idea is look at the previous letter to see if the current letter has to upper case. Introduced a constant UPCASE_AFTER so it's easy to add other punctuation marks (say, '!', '?'). Added test case. Refactored for readability.
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#define UPCASE_AFTER " ."
char *cap_string(char *str) {
for (int i = 0; str[i]; i++) {
if (!i || strchr(UPCASE_AFTER, str[i-1])) {
str[i] = toupper(str[i]);
}
}
return str;
}
int main() {
printf("%s\n", cap_string((char []) {"a bb c.d.ee..f."}));
return 0;
}
and it returns:
A Bb C.D.Ee..F.
Here's another alternative:
#include <stdio.h>
// Use standard library routines
// like 'isalpha()', 'toupper'...
// Not all character sets have ASCII's contiguous alphabets.
#include <ctype.h>
char *cap_string( char *str ) {
bool flag = true; // 'state flag' indicating "hunting for lowercase letter"
for( char *cp = str; *cp; cp++ )
if( flag && islower( *cp ) ) {
*cp = (char)toupper( *cp );
flag = false;
}
else
flag = !isalpha( *cp );
return str; // 'return' is not a function call
}
int my_main() {
char str[] = "what? now is the time (we say sometimes) to learn C.";
printf( "%s\n", cap_string( str ) );
return 0; // 'return' is not a function call
}
Output
What? Now Is The Time (We Say Sometimes) To Learn C.

How can i make first letter of all words in a string uppercase recursively? [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 3 years ago.
Improve this question
I wonder how can i make this. I don't want any iteration or isalpha() and toupper() functions. We want to use our own code to read input. We assume that user enters all characters lowercase. This is what i've tried so far:
define MAX_LENGTH 100
int read_line(char str[],int);
void capitalize(char[]);
int main (void){
char A[MAX_LENGTH];
printf("Enter a text \n");
read_line(A, MAX_LENGTH);
capitalize(A);
puts(A);
}
int read_line(char str[],int n){
int ch,i=0;
while ((ch = getchar()) != '\n'){
if(i<MAX_LENGTH)
str[i++] = ch;
}
str[i] = '\0';
return i;
}
void capitalize(char str[]){
int x = strlen(str);
static int i = 0;
if(str[i]=='\0')
return;
if(str[i-1]==' '|| i == 0)
str[i] -= 32;
i++;
return capitalize(&str[i]);
}
Output is like this:
Enter a text:
asdf sdf df
Asdf sdf df
What is wrong with my code?
You code is invalid. For example even the function read_line
int read_line(char str[],int n){
int ch,i=0;
while ((ch = getchar()) != '\n'){
if(i<MAX_LENGTH)
str[i++] = ch;
}
str[i] = '\0';
return i;
}
is incorrect at least because when i is equal to MAX_LENGTH -1 there is an access memory beyond the character array
str[i] = '\0';
That is the condition of the loop is initially written incorrectly.
As for the recursive function then it can look for example the following way.
#include <stdio.h>
int is_blank( char c )
{
return c == ' ' || c == '\t';
}
char * capitalize( char *s )
{
if ( *s )
{
int blank = is_blank( *s );
if ( !blank )
{
*s &= ~' ';
}
capitalize( s + 1 );
if ( !blank && !is_blank( *( s + 1 ) ) && *( s + 1 ) )
{
*( s + 1 ) |= ' ';
}
}
return s;
}
int main(void)
{
char s[] = "hello everybody. how do you do?";
puts( s );
puts( capitalize( s ) );
return 0;
}
The program output is
hello everybody. how do you do?
Hello Everybody. How Do You Do?
Here is a similar demonstrative program but with your fixed function read_line.
#include <stdio.h>
#define MAX_LENGTH 100
int is_blank( char c )
{
return c == ' ' || c == '\t';
}
char * capitalize( char *s )
{
if ( *s )
{
int blank = is_blank( *s );
if ( !blank )
{
*s &= ~' ';
}
capitalize( s + 1 );
if ( !blank && !is_blank( *( s + 1 ) ) && *( s + 1 ) )
{
*( s + 1 ) |= ' ';
}
}
return s;
}
size_t read_line( char *s, size_t n )
{
int ch;
size_t i = 0;
while ( i + 1 < n && ( ch = getchar() ) != EOF && ch != '\n' )
{
s[i++] = ch;
}
s[i] = '\0';
return i;
}
int main(void)
{
char s[MAX_LENGTH];
read_line( s, MAX_LENGTH );
puts( s );
puts( capitalize( s ) );
return 0;
}
If to enter the string
hello everybody. how do you do?
then the program output will be the same as shown above.
hello everybody. how do you do?
Hello Everybody. How Do You Do?
If the bit-wise operations is unclear for you then you can substitute this statement
for this statement
*s &= ~' ';
for this statement
*s -= 'a' - 'A';
and this statement
*( s + 1 ) |= ' ';
for this statement
*( s + 1 ) += 'a' - 'A';
If to use your approach to the implementation of a recursive function with a static variable then it will be interesting to you why your function does not work will not be?
Let's consider it ignoring the first statement with the call of strlen.
void capitalize(char str[]){
int x = strlen(str);
static int i = 0;
if(str[i]=='\0')
return;
if(str[i-1]==' '|| i == 0)
str[i] -= 32;
i++;
return capitalize(&str[i]);
}
First of all after the first call the function for one string you may not call it a second time for another string because the static variable i will not be already equal to 0.
The condition of the if statement should be written at least like
if ( i == 0 || str[i-1]==' ' )
that is the order of sub-expressions shall be changed.
The return statement shall not contain an expression
return capitalize(&str[i]);
you could just write
capitalize(&str[i]);
Nevertheless the initial value of the pointer str was changed. However within the function you are using the index i relative the initial value of str of the first call of the function.
And I am sure it is interesting to you how correctly to rewrite the function, is not it?
The function can look the following way as it is shown in the demonstrative program below.
#include <stdio.h>
#define MAX_LENGTH 100
int is_blank( char c )
{
return c == ' ' || c == '\t';
}
char * capitalize( char *s )
{
static size_t i = 0;
if ( *( s + i ) )
{
if ( !is_blank( s[i] ) && ( i == 0 || is_blank( s[i-1] ) ) )
{
s[i] -= 'a' - 'A';
}
++i;
capitalize( s );
--i;
}
return s;
}
size_t read_line( char *s, size_t n )
{
int ch;
size_t i = 0;
while ( i + 1 < n && ( ch = getchar() ) != EOF && ch != '\n' )
{
s[i++] = ch;
}
s[i] = '\0';
return i;
}
int main(void)
{
char s[MAX_LENGTH];
read_line( s, MAX_LENGTH );
puts( s );
puts( capitalize( s ) );
return 0;
}
I would write the following:
void capitalize(char* str) {
if (! *str) return; // job done
*str = toupper(*str); // capitalize "correctly"
capitalize(++str);
}
the main problem is that you're using index and recursion at the same time, simple recursive solution would be:
void capitalize(char* str, bool start){
if (!*str) return; // end as soon as \0 is met
if (start)
if (*str >= 'a' && *str <= 'z') *str = (*str - 'a') + 'A'; // capitalize latin letters
capitalize(str + 1, *str == ' '); // work with next symbol
}
should be called as capitalize(str, true)
Should do the job:
void capitalize(char *s)
{
while(1) {
while (*s==' ') s++; // skip spaces
if (!*s) return; // check for end
if ('a'<=*s && *s<='z') *s -= 32; // capitalize if needed
while (*s && *s!=' ') s++; // advance
} // loop
}
(I call this "Stupid Character Processing")

How to Tokenize string[array]?

I need to tokenize a string from an array, i need just three words and ignore all tabs '\t' and spaces ' '
the array line[] is just a test case.
I debugged mine, the first array (supposed to carry only the first word) got filled by spaces & letters from 3 words, not stopping after the first word when a tab or space is met. BTW my program crashed. i suspect getting out of array bounds maybe.
What am I doing wrong?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char line[] = " CLOOP LDA buffer,x";
char array1[20] ="";
char array2[20] ="";
char array3[20] ="";
int i = 0;
for( i ; i<strlen(line) ; i++)
{
while ( (line[i] != ' ') && (line[i] != '\t'))
{
if(array1[0] == '\0')
{
int j = 0;
while(line[i] != ' ' && line[i] != '\t')
{
array1[j] = line[i];
i++;
j++;
}
}
if(array2[0] =='\0');
{
int k = 0;
while(line[i] != ' ' && line[i] != '\t')
{
array2[k] = line[i];
i++;
k++;
}
}
if(array3[0] == '\0')
{
int g = 0;
while(line[i] != ' ' && line[i] != '\t')
{
array3[g] = line[i];
i++;
g++;
}
}
}
}
printf("array 1: %s\n array2: %s\n array3: %s\n", array1, array2, array3);
return(0);
}
You are over-complicating things. First of all it is difficult to feed all 3 arrays at the same time. The processing for one token should be completely finished before moving to the other token.
I would propose to "eat" all the white spaces before starting to process a token.
That is done by:
// skip leading delimiters
if( skip_leading_delimiters )
{
if( is_delimiter( delimiters, line[i]) ) continue;
skip_leading_delimiters = 0;
}
After token is processes you can move to the next token and process it. I tried to preserve your concept and approach as much as I could. The amount of while loops has been reduced to 0 since // skip leading delimiters section takes care of it.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int is_delimiter(const char * delimiters, char c) // check for a delimiter
{
char *p = strchr (delimiters, c); // if not NULL c is separator
if (p) return 1; // delimeter
else return 0; // not a delimeter
}
int main()
{
char line[] = " CLOOP LDA buffer,x";
char array1[20];
char array2[20];
char array3[20];
int con1 = 1;
int con2 = 0;
int con3 = 0;
int con1s = 0;
int con2s = 0;
int con3s = 0;
int i = 0;
int j = 0;
int skip_leading_delimiters = 1;
char * delimiters = " \b";
for(i = 0; i < strlen(line); i++)
{
// skip leading delimiters
if( skip_leading_delimiters )
{
if( is_delimiter( delimiters, line[i]) ) continue;
skip_leading_delimiters = 0;
}
if(con1)
{
if(line[i] != ' ' && line[i] != '\t')
{
array1[j] = line[i];
j++;
array1[j] = 0;
con1s = 1;
}
else
{
if(con1s)
{
con1 = 0;
con2 = 1;
skip_leading_delimiters = 1;
j = 0;
}
continue;
}
}
if(con2)
{
if(line[i] != ' ' && line[i] != '\t')
{
array2[j] = line[i];
j++;
array2[j] = 0;
con2s = 1;
}
else
{
con2 = 0;
con3 = 1;
skip_leading_delimiters = 1;
j = 0;
continue;
}
}
if(con3)
{
if(line[i] != ' ' && line[i] != '\t')
{
array3[j] = line[i];
j++;
array3[j] = 0;
con3s = 1;
}
else
{
con3 = 0;
j = 0;
continue;
}
}
}
printf(" array1: %s\n array2: %s\n array3: %s\n", array1, array2, array3);
return(0);
}
Output:
array1: CLOOP
array2: LDA
array3: buffer,x

How to check if particular characters appeared in same order?

For example I have a string: PO-ELK=SAEER:SWE
- must always be before = and that should always be before :.
Is there a easy way I could check to see if those conditions are met in the string I input and if not an error is returned.
Just a little programming needed.
const char *symbol = "-=:";
const char *s = "PO-ELK=SAEER:SWE";
while (*s) {
if (*s == *symbol) symbol++;
s++;
}
if (*symbol) puts("Fail");
else puts("Success");
How about 3 calls to strchr()?
const char *s = "PO-ELK=SAEER:SWE";
const char *t;
if ((t = strchr(s, '-')) && (t = strchr(t, '=')) && (t = strchr(t, ':'))) puts("Success");
else puts("Failure");
Sort-of interesting problem, it might be good for code-golf
$ cat > yesno.c
#include <stdio.h>
#include <strings.h>
int main(int ac, char **av) {
char *s = "-=:",
*s0 = s,
*i = av[1];
while(*s && i && *i) {
if(index(s0, *i)) {
if(*i == *s) {
++i;
++s;
continue;
} else
break;
}
++i;
}
printf("%s\n", *s ? "no" : "yes");
}
^D
$ cc -Wall yesno.c
$ ./a.out PO-ELK=SAEER:SWE
There are some grey areas in the spec tho. Are the characters ever duplicated? And if so do we search for a subsequence that is in-order? Do we require that all three are found? Does the program need to be interactive or can it just use the shell args?
Use std::string::find. Make use of second argument.
First look for "-" and if it could be find, search for "=" passing position of "-" as start point. Then do the same for ":" with position of "=" as a second argument.
You can use the strchr function, which return a pointer to the character in the string. So you can call this function for each of the characters in question and check that the indexes are in the right order.
const char *str = "PO-ELK=SAEER:SWE";
const char *dashPtr = strchr(str,'-');
const char *equalPtr = strchr(str,'=');
const char *colonPtr = strchr(str,':');
if ((dashIdx == NULL) || (equalIdx == NULL) || (colonIdx == NULL)) {
printf("one of -, =, : not found\n");
} else {
if ((dashPtr < equalPtr) && (equalPtr < colonPtr)) {
printf("order correct\n");
} else {
printf("order incorrect\n");
}
}
Use state machine in this kind of problem :
Here is my solution, i didn't test it but it should give you some ideas
#include <string.h>
#include <stdio.h>
typedef enum {init, s1,s2,s3,end}eState;
int main()
{
char str[20] ="PO-ELK=SAEER:SWE";
int iExit = 0;
int i =0;
char c;
eState state = init;
while (!iExit)
{
c=str[i];
switch (state)
{
case init:
if (c =='-')
state = s1;
else if ((c =='=')||(c ==':'))
state = end;
break;
case s1:
if (c =='=')
state = s2;
else if(c ==':'||c=='-')
state = end;
break;
case s2:
if (c ==':')
state = s3;
else if(c =='='||c=='-')
state = end;
break;
case s3:
printf ("Succes \n");
iExit = 1;
break;
case end:
printf ("Error \n"),
iExit = 1;
break;
default :
break;
}
i++;
}
return 0;
}
You can use the idea shown in this demonstrative program. The advantage of this approach is that you can write just an if-statement to check a string.
#include <stdio.h>
#include <string.h>
int main( void )
{
const char *s = "PO-ELK=SAEER:SWE";
const char *t = "-=:";
size_t n = 0;
if ( s[n = strcspn( s + n, t )] == t[0] &&
s[n += 1 + strcspn( s + n + 1, t )] == t[1] &&
s[n += 1 + strcspn( s + n + 1, t )] == t[2] )
{
printf( "\"%s\" is a valid string\n", s );
}
else
{
printf( "\"%s\" is not a valid string\n", s );
}
s = "PO-ELK:SAEER=SWE";
n = 0;
if ( s[n = strcspn( s + n, t )] == t[0] &&
s[n += 1 + strcspn( s + n + 1, t )] == t[1] &&
s[n += 1 + strcspn( s + n + 1, t )] == t[2] )
{
printf( "\"%s\" is a valid string\n", s );
}
else
{
printf( "\"%s\" is not a valid string\n", s );
}
return 0;
}
The program output
"PO-ELK=SAEER:SWE" is a valid string
"PO-ELK:SAEER=SWE" is not a valid string
To guarantee that the string does not contain more than one of each target character you can write the condition the following way
if ( s[n = strcspn( s + n, t )] == t[0] &&
s[n += 1 + strcspn( s + n + 1, t )] == t[1] &&
s[n += 1 + strcspn( s + n + 1, t )] == t[2] &&
s[n += 1 + strcspn( s + n + 1, t )] == '\0' )

Need to take multiple digit input from a string removing spaces and alphabets

I am writing a program where I am taking string as an input, here I need to remove spaces , ignore alphabets and use only numerals.
I am able to achieve removing spaces and alphabets, but i can only use single digits and not multiple digits.
Example:Input string:"adsf 12af 1 a123c 53c2m34n"
Here I need to use the input as "12 1 123 54234" required for my application.
It will be great some one could share the logic or the sample code for the same.
Thanks in advance
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int pullOut(const char *str, int array[], int *size){
const char *p = str, *endp;
int pull, count = 0, max = *size, num;
do{
endp=strchr(p, ' ');
if(endp == NULL)
endp=strchr(p, '\0');
for(num=pull=0; p != endp; ++p){
if(isdigit(*p)){
num = num * 10 + *p - '0';
pull = 1;
}
}
if(pull && count < max)
array[count++] = num;
while(*p == ' ')
++p;//skip sapce
}while(*endp != '\0');
return *size = count;
}
int main(void){
char input[] = "adsf 12af 1 a123c 53c2m34n abc def";
int i, arr[128] = { 0 }, arr_num = sizeof(arr)/sizeof(int);
pullOut(input, arr, &arr_num);
for(i = 0; i < arr_num ; ++i)
printf("%d\n", arr[i]);
return 0;
}
int i = 0;int j = 0;
while (input[i])
{
if (input[i] == ' ' || (input[i] >= '0' && input[i] <= '9'))
newString[j++] = input[i];
i++;
}
newString[j] = '\0';
If you do this, you'll copy only numerics and space into newString. I let you do all the memory alloc stuff.
here is your func:
void remove_func(char *str)
{
int i;
i = 0;
while(str[i])
{
if(str[i] >= '0' && str[i] <= '9')
{
putchar(str[i]);
i++;
}
else if(str[i] == ' ' && str[i + 1] != ' ')
{
putchar(str[i]);
i++;
}
else
i++;
}
}
What about this one:
char * getNumbers(const char *src, char *dst)
{
int j=0,i=0;
while (i < strlen(src))
{
if (src[i] >= '0' && src[i] <= '9')
{
dst[j++]=src[i++];
}
else
{
// skip duplicates spaces
if (j > 0 && src[i] == ' ' && dst[j-1] != ',') dst[j++]=',';
i++;
}
}
// remove the trailing ',' if exists
if (j > 0 && dst[j-1] == ',') j--;
dst[j]='\0';
return dst;
}
char src[] = "adsf 12af 1 a123c 53c2m34n";
char dst[sizeof(src)];
getNumbers(src, dst);
printf("'%s' -> '%s'\n", src, dst);
output:
'adsf 12af 1 a123c 53c2m34n' -> '12,1,123,53234'

Resources