Find most frequent letter in C - c

Here is my code to find the most frequent letter in an string. I need to store the most frequent letters in the most_freq string and they should be in alphabetical order too.
I don't know how to handle the situation where there are more letters with the same frequency and they are also the most frequent.
char text[200];
char most_freq[27];
int freq[26];
int i;
int counter;
int temp = 0;
int max_freq;
fgets(text, sizeof(text), stdin);
size_t len = strlen(text);
if (text[len - 1] == '\n') {
text[len - 1] = '\0';
len--;
}
for (i = 0; i < 26; i++) {
freq[i] = 0;
}
for (i = 0; i < len; i++) {
if (text[i] >= 'a' && text[i] <= 'z') {
counter = text[i] - 'a';
freq[counter]++;
}
}
int max = 0;
for (i = 0; i < 26; i++) {
if (freq[i] > temp) {
temp = freq[i];
max_freq = temp;
}
printf("%c occurs %d times.\n", i + 'a', freq[i]);
if (freq[i] > freq[max]) {
max = i;
}
}
printf("Highest frequency: %d \n", max_freq);
//printf("%c \n",max+'a');
sprintf(most_freq, "%c", max + 'a');
puts(most_freq);

#include <stdio.h>
#include <ctype.h>
#define SIZE 99
int main() {
char s[SIZE];
int freq[26] = {0}, i, max = 0;
fgets(s, SIZE, stdin);
for(i = 0; s[i]; i++) {
if( isalpha((unsigned char)s[i]) ) ++freq[s[i] - ( islower((unsigned char)s[i]) ? 'a' : 'A')];
}
for(i = 0; i < 26; i++) {
if( max < freq[i] ) max = freq[i];
}
for(i = 0; i < 26; i++) {
if( freq[i] == max ) putchar('a'+i);
}
return 0;
}

I've done it by first calculating all of the letter frequencies, then finding the maximum letter frequency, and finally find printing all the characters to have that maximum frequency.
Here is my solution. I commented it to make it clearer:
#include <stdio.h>
#include <ctype.h>
int main() {
char text[200] = { 0 };
size_t freq[26] = { 0 };
// get the user's text
fgets(text,sizeof(text),stdin);
const size_t len = strlen(text);
// calculate frequencies
for (size_t i = 0; i < len; ++i)
if (text[i] >= 'a' && text[i] <= 'z')
++freq[text[i]-'a'];
size_t maxCount = 0;
// find the maximum frequency and print each letter's frequency
for (size_t i = 0; i < sizeof(freq); ++i) {
if (freq[i] > maxCount)
maxCount = freq[i];
printf("%c occurs %d times.\n", i+'a', freq[i]);
}
printf("\n\n"); // padding
// Print all characters with the maximum frequency
for (size_t i = 0; i < sizeof(freq)/sizeof(freq[0]); ++i)
if (freq[i] == maxCount)
printf("%c occurs with maximum frequency.\n", 'a'+i);
}
EDIT: You could also extend your program by making it work with letters of any case by using the tolower() function from libc's ctype.h header file to make all characters lowercase when calculating their frequencies.
The line ++freq[text[i]-'a']; would become ++freq[tolower(text[i])-'a'];.

Here is a simpler version:
#include <stdio.h>
int main() {
char text[200];
int freq[26] = { 0 };
int max_freq = 0;
// read use input or use the empty string
if (!fgets(text, sizeof(text), stdin))
*text = '\0';
// compute frequencies, update maximum frequency
for (int i = 0; text[i] != '\0'; i++) {
if (text[i] >= 'a' && text[i] <= 'z') {
int index = text[i] - 'a'; // assuming ASCII
if (++freq[index] > max_freq)
max_freq = freq[index]; // update maximum frequency
}
}
// print characters with maximum frequency in alphabetical order
for (int i = 0; i < 26; i++) {
if (freq[i] == max_freq)
putchar('a' + i);
}
putchar('\n');
return 0;
}

Generally, when wanting the frequency of any object within a specified range, you want your frequency array to cover the entire range of possible values for that object. For a full explanation of what is frequency array is, and how to use it, a fuller explanation is provided in answer to How to remove duplicate char in string in C
In this case for ASCII characters, you have a range of 128 possible value (or 256 if you include the extended ASCII set), See ASCII Table & Description. By creating your frequency array with 128 (or 256) elements, you eliminate having to check for special cases such as lower/uppercase characters.
For example, you could cover all ASCII characters and find the most frequent a user may enter with the following:
#include <stdio.h>
#include <ctype.h>
#define SIZE 256 /* size works for both frequency array and text as a multiple */
int main (void) {
char text[SIZE * 4]; /* 1024 byte buffer */
int freq[SIZE] = {0}, max = 0; /* frequeny array & max */
fputs ("enter string: ", stdout); /* prompt */
if (!fgets (text, SIZE, stdin)) { /* read/validate EVERY input */
puts ("(user canceled input)");
return 0;
}
for (int i = 0; text[i]; i++) /* loop over each char */
if (++freq[(unsigned char)text[i]] > max) /* increment/check against max */
max = freq[(unsigned char)text[i]];
printf ("\nmost frequent appear %d times: ", max);
for (int i = '!'; i < SIZE; i++) /* loop over freq array */
if (freq[i] == max) /* if count == max */
putchar (i); /* output char */
putchar ('\n'); /* tidy up with newline */
}
(note: since a user may validly cancel input generating a manual EOF by pressing Ctrl + d, or Ctrl + z on windows, it is good to validate every input and handle the EOF case as well as any other possible failure modes)
Since ASCII characters below ' ' (space, 32, 0x20) are non-printable or whitespace such as '\t', '\n', \r, etc.., you can begin your output loop with the '!' character to ignore whitespace. If you need the frequency of every character, you can create a short lookup table with the string representations of each non-printable or whitespace character. That way if the ' ' character were the, or one of the, most frequent characters, you could output, e.g. "(sp)" or something similar, "(tab)" for tab, etc..
Example Use/Output
$ ./bin/badfreq
enter string: 123+HH~helloo+345
most frequent appear 2 times: +3Hlo
Saving Most Frequent Characters To Array
There are very few changes needed to buffer the most frequently occurring characters in an array instead of directly outputting them with putchar(). All you need to do is declare an array of sufficient size to hold the maximum number of possible characters (+1 to allow space for a nul-terminating character if you wish to treat the array as a C-string.
Below we add the buffer (character array) most_freq[] to hold the most frequently used characters, and the fill the most_freq[] array where we were simply outputting it in the first example, e.g.
#include <stdio.h>
#include <ctype.h>
#define SIZE 256 /* size works for both frequency array and text as a multiple */
int main (void) {
char text[SIZE * 4], /* 1024 byte read buffer */
most_freq[SIZE + 1] = ""; /* 257 byte buffer for storage */
int freq[SIZE] = {0}, /* frequeny array */
max = 0, /* times most frequent occurs */
mf_ndx = 0; /* most_frequent index */
fputs ("enter string: ", stdout); /* prompt */
if (!fgets (text, SIZE, stdin)) { /* read/validate EVERY input */
puts ("(user canceled input)");
return 0;
}
for (int i = 0; text[i]; i++) /* loop over each char */
if (++freq[(unsigned char)text[i]] > max) /* increment/check against max */
max = freq[(unsigned char)text[i]];
for (int i = '!'; i < SIZE; i++) /* loop over freq array */
if (freq[i] == max) /* if count == max */
most_freq[mf_ndx++] = i; /* store in most_freq array */
most_freq[mf_ndx] = 0; /* nul-terminate as string */
printf ("\n%d most frequent chars appear %d times: %s\n", /* output results */
mf_ndx, max, most_freq);
}
Following exit of the loop we use to fill the array, we add the nul-termianting character '\0' (same as plain old ASCII 0) after the last character added so we can then treat the most_freq[] array as a string.
Example Use/Output
This does allow a simple way to provide a bit more information in the output, e.g.
$ ./bin/most_freq_array
enter string: 123+HH~helloo+345
5 most frequent chars appear 2 times: +3Hlo
Or in your specific example case of "helloo":
$ ./bin/most_freq_array
enter string: helloo
2 most frequent chars appear 2 times: lo
Look things over and let me know if your have further questions.

Related

Counting occurrences of words within an inputted string in c

I'm currently struggling with counting the occurrences of the words within an inputted string. I believe it is just my logic that is off but I've been scratching my head for a while and I've just hit a wall.
The problems I'm currently yet to solve are:
With longer inputs the ends of the string is sometimes cut off.
Incrementing the counter for each word when repeated
I know the code has things that may not be the most ideal way for it to work but I'm fairly new to C so any pointers are really helpful.
To sum it up I'm looking for pointers to help solve the issues I'm facing above
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#define MAX_WORDS 1000
int main(void) {
int i,j,isUnique,uniqueLen;
char word[MAX_WORDS];
char words[200][30];
char uniqueWords[200][30];
int count[200];
char *p = strtok(word, " ");
int index=0;
//read input until EOF is reached
scanf("%[^EOF]", word);
//initialize count array
for (i = 0; i < 200; i++) {
count[i] = 0;
}
//convert lower case letters to upper
for (i = 0; word[i] != '\0'; i++) {
if (word[i] >= 'a' && word[i] <= 'z') {
word[i] = word[i] - 32;
}
}
//Split work string into an array and save each token into the array words
p = strtok(word, " ,.;!\n");
while (p != NULL)
{
strcpy(words[index], p);
p = strtok(NULL, " ,.;!\n");
index++;
}
/*
Check each string in the array word for occurances within the uniqueWords array. If it is unique then
copy the string from word into the unique word array. Otherwise the counter for the repeated word is incremented.
*/
uniqueLen = 0;
for (i = 0; i < index; i++) {
isUnique = 1;
for (j = 0; j < index; j++) {
if (strcmp(uniqueWords[j],words[i])==0) {
isUnique = 0;
break;
}
else {
}
}
if (isUnique) {
strcpy(uniqueWords[uniqueLen], words[i]);
count[uniqueLen] += 1;
uniqueLen++;
}
else {
}
}
for (i = 0; i < uniqueLen; i++) {
printf("%s => %i\n", uniqueWords[i],count[i]);
}
}
This is the code i ended up using, this turned out to be mainly an issue with using the scanf function. Placing it in a while loop made it much easier to edit words as inputted.
Thankyou for all the help :)
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
int main(void) {
// Create all variables
int i, len, isUnique, index;
char word[200];
char uniqueWords[200][30];
int count[200];
// Initialize the count array
for (i = 0; i < 200; i++) {
count[i] = 0;
}
// Set the value for index to 0
index = 0;
// Read all words inputted until the EOF marker is reached
while (scanf("%s", word) != EOF) {
/*
For each word being read if the characters within it are lowercase
then each are then incremented into being uppercase values.
*/
for (i = 0; word[i] != '\0'; i++) {
if (word[i] >= 'a' && word[i] <= 'z') {
word[i] = word[i] - 32;
}
}
/*
We use len to find the length of the word being read. This is then used
to access the final character of the word and remove it if it is not an
alphabetic character.
*/
len = strlen(word);
if (ispunct(word[len - 1]))
word[len - 1] = '\0';
/*
The next part removes the non alphabetic characters from within the words.
This happens by incrementing through each character of the word and by
using the isalpha and removing the characters if they are not alphabetic
characters.
*/
size_t pos = 0;
for (char *p = word; *p; ++p)
if (isalpha(*p))
word[pos++] = *p;
word[pos] = '\0';
/*
We set the isUnique value to 1 as upon comparing the arrays later we
change this value to 0 to show the word is not unique.
*/
isUnique = 1;
/*
For each word through the string we use a for loop when the counter i
is below the index and while the isUnique value is 1.
*/
for (i = 0; i < index && isUnique; i++)
{
/*
Using the strcmp function we are able to check if the word in
question is in the uniqueWords array. If it is found we then
change the isUnique value to 0 to show that the value is not
unique and prevent the loop happening again.
*/
if (strcmp(uniqueWords[i], word) == 0)
isUnique = 0;
}
/* If word is unique then add it to the uniqueWords list
and increment index. Otherwise increment occurrence
count of current word.
*/
if (isUnique)
{
strcpy(uniqueWords[index], word);
count[index]++;
index++;
}
else
{
count[i - 1]++;
}
}
/*
For each item in the uniqueWords list we iterate through the words
and print them out in the correct format with the word and the following count of them.
*/
for (i = 0; i < index; i++)
{
printf("%s => %d\n", uniqueWords[i], count[i]);
}
}
I don't know if you are facing some requirements, but for all it's limitations in terms of standard library functions, C does have one that would make your job much easier, strstr, e.g.:
Live demo
#include <stdio.h>
#include <string.h>
int main() {
const char str[] = "stringstringdstringdstringadasstringipoistring";
const char* substr = "string";
const char* orig = str;
const char* temp = substr;
int length = 0;
while(*temp++){length++;} // length of substr
int count = 0;
char *ret = strstr(orig, substr);
while (ret != NULL){
count++;
//check next occurence
ret = strstr(ret + length, substr);
}
printf("%d", count);
}
The output should be 6.
Regarding user3121023's comment, scanf("%999[^\n]", word); parses all characters until it finds a \n or it reaches the width limit, and I agree fgets ( word, sizeof word, stdin); is better.

How do I remove the "incompatible pointer type" warning from my code?

This code reads an input text file, and creates an output file based on its contents.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define OUT 0
#define IN 1
#define MAX 28
#define BLOCK 4000
/* Check whether the character is alphanumeric */
int isAlphanumeric(char c) {
return ('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9');
}
int main(int argc, char *argv[]) {
int c, state = OUT, length = 0, i, j, counter[MAX];
char word[30], longest_word[30];
FILE *input, *output; /* FILE pointers to open the file */
/* Initialize the counter */
for (i = state; i < MAX; i++)
counter[i] = 0;
/* Open the file */
input = fopen("complete_shakespeare.txt", "r");
output = fopen("word_length_histogram.txt", "w");
/* Keep reading the character in the file */
while ((c = getc(input)) != EOF) {
/* If the character is alphanumeric, record it */
if (isAlphanumeric(c)) {
strncat(word, &c, 1);
}
/* If the character is not alphanumeric, increment the corresponding counter, and additionally, record longest word. */
else {
length = strlen(word);
if (length == 27) strcpy(longest_word, word);
counter[length] += 1;
memset(word, 0, sizeof(word));
}
}
/* If the file ends with a word, record its length */
if (isAlphanumeric(word[0])){
length = strlen(word);
counter[length] += 1;
}
/* print the longest word to the file */
fprintf(output, "%s\n\n", longest_word);
/* Make the histogram */
for (i = 1; i < MAX; i++) {
int dividend = counter[i] / 4000 + 1;
fprintf(output, "%2d %6d ", i, counter[i]);
for (j = dividend; j >= 1; j--){
if (counter[i] != 0)
fprintf(output, "*");
}
fprintf(output, "\n");
}
/* Don't forget to close the FILEs */
fclose(input);
fclose(output);
return 0;
}
It produces the correct output file, but this error comes up whenever I compile it.
B:\CodeBlocks\Projects\Programming in C\hw_4\Homework_4\main.c|44|warning: passing argument 2 of 'strncat' from incompatible pointer type [-Wincompatible-pointer-types]|
The warning seems to come from the only line with strncat. Does anyone know how this can be remedied?
The variable c is declared as having the type int.
int c, state = OUT, length = 0, i, j, counter[MAX];
^^^^^^
So the expression &c used in this call
strncat(word, &c, 1);
has the type int * instead of the type char *.
There is no sense to call strncat for one character. Moreover the array word has indeterminate values because it was not initialized.
char word[30], longest_word[30];
You could write
char word[30], longest_word[30];
word[0] = '\0';
And then something like the following
size_t n = 0;
while ((c = getc(input)) != EOF) {
/* If the character is alphanumeric, record it */
if (isAlphanumeric(c)) {
word[n] = ( char )c;
word[++n] = '\0';
}
/* If the character is not alphanumeric, increment the corresponding counter, and additionally, record longest word. */
else {
if (n == 27) strcpy(longest_word, word);
counter[n] += 1;
n = 0;
word[n] = '\0';
}
}
That is, the variable n will keep track of the current length of the string stored in the array word.

Why is there an extra line after my output?

#include <stdio.h>
#include <ctype.h>
int main()
{
char str[100];
char splitStrings[10][10];
int i, j, cnt;
printf("Enter a sentence, up to 255 charcters: \n");
fgets(str, sizeof str, stdin);
j = 0; cnt = 0;
for (i = 0; i <= (strlen(str)); i++)
{
if (!ispunct(str[i]) == ' ' || isalpha(str[i]) == '\0')
{
splitStrings[cnt][j] = '\0';
cnt++; //for next word
j = 0; //for next word, init index to 0
}
else
{
splitStrings[cnt][j] = str[i];
j++;
}
}
for (i = 0; i < cnt; i++)
printf("\n%s %d \n", splitStrings[i], strlen(splitStrings[i]));
return 0;
}
Here is my code, I am trying to input a sentence and it will spilt up the string by words and count the number of letter. But it appear there an additional 0 in my output? And how do I get rid of it
output
fgets() will put a newline character it read into the buffer when the input is short enough to fit (as in the example data).
The newline character is not an alphabet, so isalpha(str[i]) == '\0' will become true and it moves on next word.
Then, the next charcter is terminating null-character. (it is processed because the loop condition is i <= (strlen(str)))
It is also not an alphabet, so it also moves on next word.
There are no characters between the newline character and the terminating null-character, so it is printed as zero-character word.

I want my code to print the frequency of characters in the order of appearance of string

My code is printing the frequency of characters in random order. What can be done so that it prints the frequency of characters in order in which the word is given. My current code is as follows
#include <stdio.h>
#include <conio.h>
void main() {
char string1[50];
int i = 0, counter[26] = { 0 };
printf("\nEnter a string\n");
//Inputs a string
gets(string1);
while (string1[i] != '\0') {
//checks and includes all the characters
if (string1[i] >= 'a' && string1[i] <= 'z') {
//counts the frequency of characters
counter[string1[i] - 'a']++;
i++;
}
}
//printing frequency of each character
for (i = 0; i < 26; i++) {
if (counter[i] != 0)
printf("%c occurs %d times.\n", i + 'a', counter[i]);
}
getch();
}
sample output:
There are several issues in your code:
you use gets: this function is unsafe, it was removed from the current version of the C Standard.
you increment i only for if string1[i] is a lowercase letter: you will run an infinite loop if you type any other character.
the proper prototype for main is either int main(void) or int main(int arc, char *argv[]).
you only count lower case letters. H is upper case, thus not counted.
Here is an improved version:
#include <stdio.h>
#include <ctype.h>
int main(void) {
char string1[128];
int i = 0, counter[256] = { 0 };
printf("\nEnter a string\n");
//Inputs a string
if (fgets(string1, sizeof string1, stdin) == NULL) {
// empty file: got an empty line
*string1 = '\0';
}
for (i = 0; string1[i] != '\0'; i++) {
if (isalpha((unsigned char)string1[i])) {
//counts the frequency of letters
counter[string1[i]]++;
}
}
//printing frequency of each counted character
//characters are printed in the order of appearance
for (i = 0; string1[i] != '\0'; i++) {
if (counter[string1[i]] != 0) {
printf("%c occurs %d times.\n",
string1[i], counter[string1[i]]);
counter[string1[i]] = 0; // print each letter once.
}
}
getch();
return 0;
}
You can get the characters printed in order of their appearance by using the string a second time to generate the output.
In your section where you are "printing the frequency of each character", use the code to process the input string. This time, if the frequency value is not zero, print it and then reset the frequency value to zero. If the frequency value is zero, you must have already printed it so do nothing.
//printing frequency of each counted character (in input order)
for (i = 0; string1[i] != '\0'; i++) {
char ch = string[i];
if (counter[ch - 'a'] != 0) {
printf("%c occurs %d times.\n", ch, counter[ch - 'a']);
counter[ch - 'a'] = 0;
}
}

C - Find most frequent element in char array

i'm developing a little function to display the most frequent character in a (char) array.
This is what I've accomplished so far, but I think i'm on the wrong way.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char test[10] = "ciaociaoci";
max_caratt(test, 10);
}
int max_caratt(char input[], int size)
{
int i;
char max[300];
max[0] = input[0];
for (i=0; i<size; i++)
{
if(strncmp(input,input[i],1) == 1)
{
printf("occourrence found");
max[i] = input[i];
}
}
}
Any help?
Actually, the correct code is this.
It's just a corrected version of IntermediateHacker's below snippet.
void main()
{
int array[255] = {0}; // initialize all elements to 0
char str[] = "thequickbrownfoxjumpedoverthelazydog";
int i, max, index;
for(i = 0; str[i] != 0; i++)
{
++array[str[i]];
}
// Find the letter that was used the most
max = array[0];
index = 0;
for(i = 0; str[i] != 0; i++)
{
if( array[str[i]] > max)
{
max = array[str[i]];
index = i;
}
}
printf("The max character is: %c \n", str[index]);
}
The easiest way to find the most common character is to create an int array of 255 and just increment the arraly element that corresponds to the character. For example: if the charcter is 'A', then increment the 'A'th element (if you look at any ascii table you will see that the letter 'A' has a decimal value of 65)
int array[255] = {0}; // initialize all elements to 0
char str[] = "The quick brown fox jumped over the lazy dog.";
int i, max, index;
// Now count all the letters in the sentence
for(i = 0; str[i] != 0; i++)
{
++array[str[i]];
}
// Find the letter that was used the most
max = array[0];
index = 0;
for(i = 0; str[i] != 0; i++)
{
if( array[i] > max)
{
max = array[i];
index = i;
}
}
printf("The max character is: %c \n", (char)index);
You're passing a (almost) string and a char to strncmp(). strncmp() takes two strings (and an integer). Your program shouldn't even compile!
Suggestion: increase the warning level of your compiler and mind the warnings.
You may want to look at strchr() ...
Assuming an input array of 0-127, the following should get you the most common character in a single pass through the string. Note, if you want to worry about negative numbers, shift everything up by +127 as needed...
char mostCommonChar(char *str) {
/* we are making the assumption that the string passed in has values
* between 0 and 127.
*/
int cnt[128], max = 0;
char *idx = str;
/* clear counts */
memset((void *)cnt, 0, sizeof(int) * 128);
/* collect info */
while(*idx) {
cnt[*idx]++;
if(cnt[*idx] > cnt[max]) {
max = *idx;
}
idx++;
}
/* we know the max */
return max;
}
If you don't need to preserve the input array, you could sort the input array first, then find the longest contiguous run of a single character. This approach is slower, but uses less space.
I made a working version using structs. It works fine, I guess, but I think there's a MUCH better way to write this algorithm.
#include <stdio.h>
#include <stdlib.h>
struct alphabet {
char letter;
int times;
};
typedef struct alphabet Alphabet;
void main() {
char string[300];
gets(string);
Alphabet Alph[300];
int i=0, j=0;
while (i<=strlen(string)) {
while(j<=300) {
if(string[i] != Alph[j].letter) {
Alph[i].letter = string[i];
Alph[i].times = 1;
}
else {
Alph[j].times++;
}
j++;
}
j=0;
i++;
}
int y,max=0;
char letter_max[0];
for (y=0; y<strlen(string); y++) {
printf("Letter: %c, Times: %d \n", Alph[y].letter, Alph[y].times);
if(Alph[y].times>max) {
max=Alph[y].times;
letter_max[0]=Alph[y].letter;
}
}
printf("\n\n\t\tMost frequent letter: %c - %d times \n\n", letter_max[0], max);
}
I saw you all creating big arrays and "complex" stuff so here I have easy and simple code xD
char most_used_char (char s[]) {
int i; //array's index
int v; //auxiliary index for counting characters
char c_aux; //auxiliary character
int sum = 0; //auxiliary character's occurrence
char c_max; //most used character
int max = 0; //most used character's occurrence
for (i = 0; s[i]; i++) {
c_aux = s[i];
for (v = 0; s[v]; v++)
if (c_aux == s[v]) sum++; /* responsible cycle for counting
character occurrence */
if (sum > max) { //checks if new character is the most used
max = sum;
c_max = c_aux;
}
sum = 0; /* reset counting variable so it can counts new
characters occurrence */
}
return c_max; //this is the most used character!
}

Resources