Most common character in a file in C - c

I'm doing my C programming course homework and I need to find a most common character in given file.
My testing with a testfile, emptyfile and other small amount text files works great (or at least I think so), but in the last long testfile something goes wrong and the error message is: "Should have returned 'e' (101) for file rfc791.txt. You returned 'b' (98)".
So what I'm asking that what might be wrong with my code, when suddenly the most common letter is not what is should be?
int most_common_character(char *filename) {
FILE *f;
if ((f = fopen(filename, "r")) == NULL) {
fprintf(stderr, "Not opened: %s\n", strerror(errno));
return -1;
}
char frequency[26];
int ch = fgetc(f);
if (ch == EOF) {
return 0;
}
for (ch = 0; ch < 26; ch++) {
frequency[ch] = 0;
}
while (1) {
ch = fgetc(f);
if (ch == EOF) {
break;
}
if ('a' <= ch && ch <= 'z') {
frequency[ch - 'a']++;
}
else if ('A' <= ch && ch <= 'Z') {
frequency[ch - 'A']++;
}
}
int maxCount = 0;
int maxChar = 0;
for (int i = 0; i <= 26; ++i) {
if (frequency[i] > maxCount) {
maxCount = frequency[i];
maxChar = i;
}
}
fclose(f);
return maxChar + 'a';
}
I would be very grateful if someone has any hints to fix my code :) I've tried to search the solution to this problem from many other related topics but nothing seems to work.

You should use < operator in the second for loop. Because of that when you are checking frequency[i] > maxCount, at frequency[26] it behaves undefined behaviour, meaning the value at that index may be less or higher than the compared value.

Your code do have some problems. However, they are so tiny so the code still works well with small tests.
int ch = fgetc(f); drop the first char in the file
for (int i = 0; i <= 26; ++i) break out of the array 's range (only from 0-->25)
Beside these small mistakes, your code is awesomely fine. Well done #thumbsup

Loop runs out-of-bounds. #Weather Vane
// for (int i = 0; i <= 26; ++i) {
for (int i = 0; i < 26; ++i) {
Code throws away result of the first character. #BLUEPIXY
int ch = fgetc(f);
if (ch == EOF) {
return 0;
}
// This value of ch is not subsequently used.
Other fixes as below
int most_common_character(char *filename) {
...
// Use a more generous count #Weather Vane
// char frequency[26];
// Consider there may be more than 26 different letters
// fgetc return EOF and value in the unsigned char range
int frequency[UCHAR_MAX + 1] = { 0 };
// Not needed as array was initialize above
// for (ch = 0; ch < 26; ch++) { frequency[ch] = 0; }
// BTW correct type declaration of int, avoided rookie mistake of using char
int ch;
// Codes use tolower(), islower() as that is the portable way to
// handle type-of-character detection
while ((ch = fgetc(f)) != EOF) {
frequency[tolower(ch)]++; // could add check to insure frequency[] does not overflow
}
int maxCount = 0;
int maxChar = -1;
for (int i = 0; i <= UCHAR_MAX; ++i) {
if (islower(i) && frequency[i] > maxCount) {
maxCount = frequency[i];
maxChar = i;
}
}
fclose(f);
return maxChar;
}

Related

Write a program to break long input lines into two or more shorter lines of length at most n

I am currently learning C and working on a problem that breaks input lines into lengths of n. Below is my current code where n is set to 30. When it reaches the n-th index it replaces that index with ' ' and then line breaks, but it will only do it for the first n characters and I'm unsure what isn't getting rest in order to it to continue making a new line at the nth index.
int getline2(void);
int c, len, cut, counter;
char line[MAXLINE];
main() {
while ((len = getline2()) > 0) {
if (len > BREAK) {
c = 0;
counter = 0;
while (c < len) {
if (line[c] == ' ') {
counter = c;
}
if (counter == BREAK) {
line[counter] = '\n';
counter = 0;
}
counter++;
c++;
}
}
printf("%s", line);
}
return 0;
}
int getline2(void) {
int c, i;
extern char line[];
for (i = 0; i < MAXLINE - 1 && (c = getchar()) != EOF && c != '\n'; ++i)
line[i] = c; //i gets incremented at the end of the loop
if (c == '\n') {
line[i] = c;
++i;
}
line[i] = '\0';
return i;
}
Your code is a little too complicated:
you do not need to store the bytes read from the file into an array, just output them one at a time, keeping track of the line length
when the line would become too long, output a newline and reset the count before you output the byte.
also not that none of these global variables deserves to be global.
and the prototype for main should be either int main(), int main(void) or int main(int argc, char *argv[]) or equivalent. main()` is an obsolete syntax that should be avoided.
Here is a modified version:
#include <stdio.h>
#define BREAK 30
int main() {
int c;
int len = 0;
while ((c = getchar()) != EOF) {
if (c == '\n') {
putchar(c);
len = 0;
} else {
if (len >= BREAK) {
putchar('\n');
len = 0;
}
putchar(c);
len++;
}
}
return 0;
}

Get length of char array with null elements in C

Currently I am making a project that uses char arrays that have null elements. I want to be able to get the length of the array, in the sense of the number of elements that aren't null. This seemed reasonably trivial and I made this function:
int getWordLen(char word[]) {
int count = 0;
for (int i = 0; i < 512; i++) {
if (word[i] != '\0') {
count++;
}
}
printf("%d ", count);
return count;
}
However, every char array returns a length of 188. Any help would be appreciated.
This is the function I was calling it from:
void redact(Words * redactWords, char fileName[]) {
FILE * file = fopen(fileName, "r");
FILE * outputFile = fopen("outputFile.txt", "w+");
char word[512];
int i = 0;
char c;
while (c != EOF) {
c = getc(file);
if ((c > 96) && (c < 123)) {
word[i] = c;
i++;
continue;
}
else if ((c > 64) && (c < 91)) {
word[i] = c + 32;
i++;
continue;
}
i = 0;
if (isWordRedactWord(redactWords, word)) {
//write stars to file
char starStr[512];
for (int i = 0; i < getWordLen(word); i++) {
starStr[i] = '*';
}
fputs(starStr, outputFile);
}
else {
//write word to file
fputs(word, outputFile);
}
strcpy(word, emptyWord(word));
}
fclose(file);
fclose(outputFile);
}
In the initial while, I would only use while(!EOF).
Also, I believe you are using a lot more resources than necessary with the implementation of that for inside the while:
char starStr[512];
for (int i = 0; i < getWordLen(word); i++) {
starStr[i] = '*';
I suggest you to put it outside the while loop and see what happens.
If it is always giving you 188 of lenght, it is counting something that's constant, and may be related to that outer loop.
Hope you can solve it!

Replacing three 'a' in with a single '*' in a string

So my program should get input from an user and store it in an array. After that if the input string includes three 'a's in a row it should be replaced with a single '*'. However I can't seem to get it right. It only replaces the first a with a *. I tried to replace the following 2 a with a blank but the output looks funny.
For this exercise I have to use putchar() and getchar().
Thank you in advance.
#include <stdio.h>
char c;
char buffer[256];
int counter= 0;
int i;
int main()
{
while ((c = getchar()) != '\n') {
buffer[counter] =c;
counter++;
if (counter >255) {
break;
}
}
for(i=0; i<256; i++) {
if(buffer[i]== 'a'&&buffer[i+1]=='a'&&buffer[i+2]=='a')
{
buffer[i]= '*';
buffer[i+1]=' ';
buffer[i+2]=' ';
}
putchar(buffer[i]);
}
putchar('\n');
return 0;
}
So my program should get input from an user and store it in an array.
After that if the input string includes three 'a's in a row it should
be replaced with a single '*'. However I can't seem to get it right.
You almost got it! Just move index by 2 to and continue.
#include <stdio.h>
char c;
char buffer[256];
int counter= 0;
int i;
int main(void)
{
while ((c = getchar()) != '\n') {
buffer[counter] =c;
counter++;
if (counter >= 255) {
break;
}
}
buffer[counter] ='\0';
for(i=0; i<256; i++) {
if(buffer[i]== 'a'&&buffer[i+1]=='a'&&buffer[i+2]=='a')
{
buffer[i]= '*';
putchar(buffer[i]);
i = i + 2;
continue;
}
putchar(buffer[i]);
}
putchar('\n');
return 0;
}
Test:
123aaa456aaa78
123*456*78
In string you must assign a end of character at the end and that is call null character \0 or just a numeric 0. Correct your code like below:-
while ((c = getchar()) != '\n') {
buffer[counter] =c;
counter++;
if (counter >=255) {
break;
}
}
buffer[counter] ='\0';// or buffer[counter] =0;
To avoid side effect in a string array always set all its value with 0 first:-
char buffer[256];
memset(buffer, 0, sizeof(buffer));
If you want to change the number of characters, you will need to create a different buffer to copy the output to.
If you really just want to output to the console, you could just write every character until you hit your matching string.
#include <stdio.h>
char c;
char buffer[256];
char output[256];
int counter= 0;
int i, j;
int main()
{
while ((c = getchar()) != '\n') {
buffer[counter] = c;
counter++;
if (counter >255) {
break;
}
}
buffer[counter] = 0;
for(i=0, j=0; i<256; i++, j++) {
if(buffer[i] == 'a' && buffer[i+1] == 'a'&& buffer[i+2] == 'a')
{
output[j]= '*';
i += 2;
}
else
output[j] = buffer[i];
putchar(output[j]);
}
putchar('\n');
return 0;
}
There are multiple problems in your code:
there is no reason to make all these variables global. Declare them locally in the body of the main function.
use int for the type of c as the return value of getchar() does not fit in a char.
you do not check for EOF.
your test for buffer overflow is off by one.
you do not null terminate the string in buffer. You probably make buffer global so it is initialized to all bits 0, but a better solution is to set the null terminator explicitly after the reading loop.
to replace a sequence of 3 characters with a single one, you need to copy the rest of the string.
You can use a simple method referred as the 2 finger approach: you use 2 different index variables into the same array, one for reading, one for writing.
Here is how it works:
#include <stdio.h>
int main() {
char buffer[256];
int c;
size_t i, j, counter;
for (counter = 0; counter < sizeof(buffer) - 1; counter++) {
if ((c = getchar()) == EOF || c == '\n')
break;
buffer[counter] = c;
}
buffer[counter] = '\0';
for (i = j = 0; i < counter; i++, j++) {
if (buffer[i] == 'a' && buffer[i + 1] == 'a' && buffer[i + 2] == 'a') {
buffer[j] = '*';
i += 2;
} else {
buffer[j] = buffer[i];
}
}
buffer[j] = '\0'; /* set the null terminator, the string may be shorter */
printf("modified string: %s\n", buffer);
return 0;
}

Detecting EOF of a txt File in C

I wrote this code which reads every char of my text and puts it into my char array. My Problem is that the end of the file is not detected and so the fscanf() returns after the end of the text every time the last char until my array is filled. How can I prevent that? I am programming in C.
My Code:
int main() {
char array[50][50];
char buff;
FILE *cola = fopen("C:/Users/danie/Desktop/cola.txt", "r");
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
fscanf(cola, "%c", &buff);
array[i][k] = buff;
}
}
fclose(cola);
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
printf("%c", array[i][k]);
}
}
return 0;
}
Thank you for your help.
fscanf() returns the number of successful conversions. You should test the return value and also handle newline characters specifically:
#include <stdio.h>
int main(void) {
char array[50][50];
char buff;
FILE *cola = fopen("C:/Users/danie/Desktop/cola.txt", "r");
if (cola == NULL) {
return 1;
}
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
if (fscanf(cola, "%c", &buff) != 1 || buff == '\n') {
array[i][k] = '\0';
break;
}
array[i][k] = buff;
}
}
fclose(cola);
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50 && array[i][k] != '\0'; k++) {
printf("%c", array[i][k]);
}
printf("\n");
}
return 0;
}
The code can be simplified if you use getc() instead of fscanf() to read bytes from the file:
#include <stdio.h>
int main(void) {
char array[50][51];
int c, i, k, n;
FILE *cola = fopen("C:/Users/danie/Desktop/cola.txt", "r");
if (cola == NULL) {
return 1;
}
for (n = 0; n < 50; n++) {
for (k = 0; k < 50; k++) {
if ((c = getc(cola)) == EOF || c == '\n') {
break;
}
array[n][k] = c;
}
array[n][k] = '\0';
if (c == EOF && k == 0)
break;
}
fclose(cola);
for (i = 0; i < n; i++) {
puts(array[i]);
}
return 0;
}
Replace:
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
fscanf(cola, "%c", &buff);
array[i][k] = buff;
}
}
with:
for (int i = 0; i < 50; i++) {
for (int k = 0; k < 50; k++) {
int c = getc(cola);
if (c == EOF)
break;
array[i][k] = c;
}
}
Since buff is then unused, don't define it. Note that the return type of getc() is an int, not just a char. Always check the I/O function for success/failure. In your original code, you don't even check whether the I/O operation succeeds, which makes detecting EOF impossible.
Note that this code makes a number of assumptions that may or may not be justifiable. For example, you assume each line in the file consists of 49 characters plus a newline; you also assume you'll never need to print the information as a 'string' (your existing code does not; it prints character by character, so it is 'safe').
You might want to describe the input as:
Read up to 50 lines with up to 49 characters plus a newline in each line, storing the result in the variable array with each line being a null-terminated string.
This is more resilient to common problems (short lines, long lines, not enough lines). The code for that might be:
enum { LINE_LEN = 50, NUM_LINES = 50 };
char array[NUM_LINES][LINE_LEN];
int i;
for (i = 0; i < LINE_LEN; i++)
{
int c;
int k;
for (k = 0; k < LINE_LEN; k++)
{
c = getc(cola);
if (c == EOF || c == '\n')
break;
if (k == LINE_LEN - 1)
{
/* Too long - gobble excess */
while ((c = getc(cola)) != EOF && c != '\n')
;
break;
}
array[i][k] = c;
}
array[i][k] = '\0';
if (c == EOF)
break;
}
int num_lines = i; // You have num_lines lines of data in your array
I found one version of the Coca Cola™ ASCII art image at https://www.ascii-code.com/ascii-art/logos/coca-cola.php which looks similar to what you have in your images, but there are many other sources and variants:
__ ___ __ .ama ,
,d888a ,d88888888888ba. ,88"I) d
a88']8i a88".8"8) `"8888:88 " _a8'
.d8P' PP .d8P'.8 d) "8:88:baad8P'
,d8P' ,ama, .aa, .ama.g ,mmm d8P' 8 .8' 88):888P'
,d88' d8[ "8..a8"88 ,8I"88[ I88' d88 ]IaI" d8[
a88' dP "bm8mP8'(8'.8I 8[ d88' `" .88
,88I ]8' .d'.8 88' ,8' I[ ,88P ,ama ,ama, d8[ .ama.g
[88' I8, .d' ]8, ,88B ,d8 aI (88',88"8) d8[ "8. 88 ,8I"88[
]88 `888P' `8888" "88P"8m" I88 88[ 8[ dP "bm8m88[.8I 8[
]88, _,,aaaaaa,_ I88 8" 8 ]P' .d' 88 88' ,8' I[
`888a,. ,aadd88888888888bma. )88, ,]I I8, .d' )88a8B ,d8 aI
"888888PP"' `8""""""8 "888PP' `888P' `88P"88P"8m"
This file's longest line is the first at 67 characters plus newline; the shortest is 61 characters plus newline. The file only has 13 lines and 845 characters (LF line endings) in total. Thus, your program is ill-equipped to deal with this particular data file. It looks for 2,500 characters, and won't get them.
My complete test code was rigged to read from standard input, rather than a fixed file name.
#include <stdio.h>
int main(void)
{
FILE *cola = stdin;
enum { LINE_LEN = 80, NUM_LINES = 50 };
char array[NUM_LINES][LINE_LEN];
int i; // Need value of i after loop
for (i = 0; i < NUM_LINES; i++)
{
int c; // Need value of c after loop
int k;
for (k = 0; k < LINE_LEN; k++)
{
c = getc(cola);
if (c == EOF || c == '\n')
break;
if (k == LINE_LEN - 1)
{
/* Too long - gobble excess */
while ((c = getc(cola)) != EOF && c != '\n')
;
break;
}
array[i][k] = c;
}
array[i][k] = '\0';
if (c == EOF)
break;
}
int num_lines = i; // You have num_lines lines of data in your array
for (i = 0; i < num_lines; i++)
puts(array[i]);
return 0;
}
I tested it on the data file shown, with an empty line at the end, and with a couple of lines containing more than 79 characters after the blank line. It handled all those special cases correctly. Note that handling user input is hard; handling perverse user input is harder. The code is less compact. You could change the rules and then change the code to match. I'm not sure this is the most minimal way to code this; it does work, however. It might be better to have a function to handle the inner input loop; the outer loop could test the return value from that function. This would cut down on the special case handling.
#include <assert.h>
#include <limits.h>
#include <stdio.h>
static int read_line(FILE *fp, size_t buflen, char *buffer)
{
assert(buflen < INT_MAX);
int c; // Need value of c after loop
size_t k; // Need value of k after loop
for (k = 0; k < buflen; k++)
{
if ((c = getc(fp)) == EOF || c == '\n')
break;
if (k == buflen - 1)
{
/* Too long - gobble excess */
while ((c = getc(fp)) != EOF && c != '\n')
;
break;
}
buffer[k] = c;
}
buffer[k] = '\0';
return (k == 0 && c == EOF) ? EOF : (int)k;
}
int main(void)
{
enum { LINE_LEN = 80, NUM_LINES = 50 };
char array[NUM_LINES][LINE_LEN];
int i;
for (i = 0; i < NUM_LINES; i++)
{
if (read_line(stdin, LINE_LEN, array[i]) == EOF)
break;
}
int num_lines = i;
for (i = 0; i < num_lines; i++)
puts(array[i]);
return 0;
}
This produces the same output from the same input as the previous version.
int main() {
//char array[50][50];
char buff;
int t;
FILE *cola = fopen("C:/Users/danie/Desktop/cola.txt", "r");
if (cola == NULL)
{
printf("Cannot open file \n");
exit(0);
}
while (1) {
t = fgetc(cola);
if (t == EOF)
break;
buff = t;
printf("%c", buff);
}
fclose(cola);
return 0;
}

How to fix segmentation fault: 11 compiler error

I have been learning from the C Programming Language book (K&R) and was writing one of the exercises that removes trailing blanks from an input. I understand that a segmentation fault is at some level a problem having to do with accessing memory that is not accessible, but I have read through this code several times and can't find the error. I would like it very much if someone could help find this error and tell me how to discover errors like this in the future.
#include <stdio.h>
#define MAXLINE 1000
#define CHAR 0 /*character definition*/
#define TRAIL 1 /*determines whether program is in a trailing blank*/
int getinput(char input[], int max);
int trailrem(char input[], char copyto[]);
int len;
int main() {
char line[MAXLINE]; /*current line*/
char newline[MAXLINE];
int i, c, newreturn; /*integer counter, character holder, current line length, and trailrem return value*/
int len;
while((len = getinput(line, MAXLINE)) > 0) {
newreturn = trailrem(line, newline);
for(i = 0; i <= newreturn; ++i)
printf("\n%c\n", newline[i]);
}
}
int getinput(char input[],int max) {
int i, c, line;
for(i = 0; (c = getchar()) != EOF && c != '\n' && c < (max-1); ++i)
input[i] = c;
if(c == '\n') {
input[i] = c;
++i;
}
input[i] = '\0';
return i;
}
int trailrem(char input[], char copy[]) {
int i, j, minusin, state, r;
for(i = len; input[i] != EOF && i >= 0; --i) {
if(input[i] =='\n')
state = TRAIL;
else if((input[i] == ' ' && state == TRAIL) ||( input[i] == '\t' && state == TRAIL))
++minusin;
else if(state == TRAIL && (input[i] != ' ' || input[i] != '\t'))
state = CHAR;
for(j = (r = len-minusin); state == CHAR; --j){
copy[j-2] = input[i];
}
}
copy[r] = '\0';
copy[r-1] = '\n';
return r;
}
So many problems in your code. But the main problem is, you have a global len
int len;
And a local len in the main function.
You are initializing len in main function like this:
while((len = getinput(line, MAXLINE)) > 0)
So the local len is updated. But the global len is still 0.
You are expecting that, you will get the updated value of len in trailrem method but you don't. In trailrem() you will get len equal to 0!
for(i = len; input[i] != EOF && i >= 0; --i)
So i is 0 too. And hence, copy[r-1] = '\n'; will crash, because r-1 can be negative.
Other problems: (BLUEPIXY and WhozCraig mentioned in the comment).
for(i = 0; (c = getchar()) != EOF && c != '\n' && c < (max-1); ++i)
here, c < (max-1) should be i < (max-1).
++minusin; in trailrem function where minusin is uninitialized.

Resources