Counting the mose frequent char in a file - c

For my CS class I need to write a program that reads an entire file. I've researched a whole bunch of different ways to do this with a string (the two for loops inside the while loops) and I've combined it with the way I was taught to read through a whole file. The problem is you can't index the frequency list with a char variable type (line). Is there an easier way to read through the file and do this?
# define MAX 200
void replace_most_freq(const char *filename, char c, FILE *destination) {
// your code here
FILE *in_file = NULL;
in_file = fopen(filename, "r");
if (!in_file) {
fprintf(destination,
"Error(replace_most_freq): Could not open file %s\n", filename);
fclose(in_file);
return;
}
int i, max = -1, len;
int freq[256] = { 0 };
char line[MAX], result;
while (fgets(line, sizeof(line), in_file)) {
len = strlen(line);
for (i = 0; i < len; i++) {
freq[line[i]]++;
}
}
while (fgets(line, sizeof(line), in_file)) {
len = strlen(line);
for (i = 0; i < len; i++) {
if (max < freq[line[i]]) {
max = freq[line[i]];
result = line[i];
}
}
}
printf("Most frequent char = %c\n", result);
return;
}

Your initial loop is almost correct: you should convert the char to an unsigned char to avoid undefined behavior on negative char values on platforms where char is signed.
The second loop is incorrect: there is no need to read from the file, just iterate over the freq array to find the largest count.
Here is a modified version:
#include <limits.h>
#include <stdio.h>
void replace_most_freq(const char *filename, char newc, FILE *destination) {
FILE *in_file = fopen(filename, "r");
if (!in_file) {
fprintf(stderr,
"Error(replace_most_freq): Could not open file %s\n", filename);
return;
}
int c, max, maxc;
int freq[UCHAR_MAX] = { 0 };
while ((c = getc(in_file)) != EOF) {
freq[c]++;
}
max = freq[maxc = 0];
for (c = 1; c < UCHAR_MAX; c++) {
if (max < freq[c])
max = freq[maxc = c];
}
printf("Most frequent char = %c (%d)\n", max, max);
rewind(in_file);
while ((c = getc(in_file)) != EOF) {
if (c == maxc)
c = newc;
putc(c, destination);
}
}

You can read file in much larger chunks:
#define BUFFSIZE (4*1024*1024)
int findMax(const size_t *, size_t);
int replace_most_freq(const char *filename, char c, FILE *destination) {
int result = 1;
FILE *fi ;
size_t freq[256] = { 0 };
size_t dataChunkLength;
long fileLength;
unsigned char *databuff = malloc(BUFFSIZE);
if(!databuff)
{
result = -2;
goto function_exit;
}
fi = fopen(filename, "r");
if (!fi)
{
result = -1;
goto function_exit;
}
if (fseek(fi, 0, SEEK_END) == -1)
{
result = -3;
goto function_exit;
}
fileLength = ftell(fi);
if (fileLength == -1)
{
result = -4;
goto function_exit;
}
if (fseek(fi, 0, SEEK_SET) == -1)
{
result = -3;
goto function_exit;
}
while(fileLength)
{
if(fileLength <= BUFFSIZE) dataChunkLength = fileLength;
else dataChunkLength = BUFFSIZE;
size_t bytesRead = fread(databuff, 1, dataChunkLength, fi);
if(bytesRead != dataChunkLength)
{
if(feof(fi) || ferror(fi))
{
result = -4;
goto function_exit;
}
}
for(size_t index = 0; index < bytesRead; index++)
{
freq[databuff[index]]++;
}
fileLength -= bytesRead;
}
int mostFrequent;
printf("The most freq char is 0x%02x\n", mostFrequent = findMax(freq, 256));
function_exit:
free(databuff);
if (fi) fclose(fi);
return result;
}

Related

How to get the length of the longest Line in a File in C

this is my first Question here so im grateful for every kind of Help.
Im trying to get the length of the longest Line in a File, so i can later calloc it and read the whole File in. My first attempt was Dynamic, but it didnt work.
My Code till now is:
FILE *inputData;
inputData = fopen("input.txt", "r");
char *input = NULL;
int longestLinelength = 0;
while(fscanf(inputData,"%[^\n]", input) != EOF) {
if(longestLineLength<strlen(input)){
longestLineLength=strlen(input);
}
}
fclose()
This code unfortunetly leads to a memory access error.
size_t longestLine(FILE *fi)
{
size_t largest = 0, current = 0;
int ch;
if(fi)
{
while((ch = fgetc(fi)) != EOF)
{
if(ch == '\n')
{
if(current > largest) largest = current;
current = 0;
}
else
{
current++;
}
}
if(current > largest) largest = current;
}
return largest;
}
I think the problem is not with realloc, but with a misunderstanding of how things work.
It would be best to read carefully what scanf does. And how pointers work.
input is a NULL pointer and you want to write to it, this causes a crash in the application. scanf needs allocated memory to write to, it does not allocate it itself. Generally I would suggest to use fgets instead of scanf as it is better to handle. The formating options of scanf can be done after you read it with fgets.
Probably this help this is based on the book The C Programming Language.
First we need a main function to get the lines in the file
int get_file_line(char line[], int maxline, FILE *fptr) {
int ch, i;
for (i = 0; i < (maxline - 1) && ((ch = getc(fptr)) != EOF) && (ch != '\n'); ++i) {
line[i] = ch;
}
if (ch == '\n') {
line[i] = ch;
++i;
}
line[i] = '\0';
return i;
}
Then we will store the data into a new array of chars
void copy(char to[], char from[]) {
int i = 0;
while (from[i] != '\0') {
to[i] = from[i];
i++;
}
}
And finally in the main function we gonna open the file and use the previous functions
FILE *ptr;
const char *file_name = "your_file.txt";
ptr = fopen(file_name, "r");
while ((len = get_file_line(line, MAXLINE, ptr)) > 0) {
if (len > max) {
max = len;
copy(longest, line);
}
}
fclose(ptr);
if (max > 0) {
printf("longest: %s\n", longest);
printf("len : %d\n", max);
}
All together
#include <stdio.h>
#define MAXLINE 1000
int get_file_line(char line[], int maxline, FILE *fptr) {
int ch, i;
for (i = 0; i < (maxline - 1) && ((ch = getc(fptr)) != EOF) && (ch != '\n'); ++i) {
line[i] = ch;
}
if (ch == '\n') {
line[i] = ch;
++i;
}
line[i] = '\0';
return i;
}
void copy(char to[], char from[]) {
int i = 0;
while (from[i] != '\0') {
to[i] = from[i];
i++;
}
}
int main() {
int len, max = 0;
char line[MAXLINE];
char longest[MAXLINE];
FILE *ptr;
const char *file_name = "your_file.txt";
ptr = fopen(file_name, "r");
while ((len = get_file_line(line, MAXLINE, ptr)) > 0) {
if (len > max) {
max = len;
copy(longest, line);
}
}
fclose(ptr);
if (max > 0) {
printf("longest: %s\n", longest);
printf("len : %d\n", max);
}
return 0;
}
I hope this was helpful
#include <stdio.h>
#include <string.h>
#define MAX_LINE_LENGTH 4096
static void process_file(char *filename);
int main(int argc, char **argv) {
int q;
if(argc <= 1) {
printf("Usage: %s <files>\n", argv[0]);
return 1;
}
for(q = 1; q < argc; q++) {
process_file(argv[q]);
}
return 0;
}
void process_file(char *filename) {
char buf[MAX_LINE_LENGTH] = {0};
FILE *file;
char line_val[MAX_LINE_LENGTH] = {0};
int line_len = -1;
int line_num = -1;
int cur_line = 1;
file = fopen(filename, "r");
if(file == NULL) {
return;
}
while(fgets(buf, MAX_LINE_LENGTH, file) != NULL) {
int len_tmp = strlen(buf) - 1;
if(buf[len_tmp] == '\n')
buf[len_tmp] = '\0';
if(line_len < len_tmp) {
strncpy(line_val, buf, len_tmp + 1);
line_len = len_tmp;
line_num = cur_line;
}
cur_line++;
/*printf("%s", buf);*/
}
fclose(file);
if(line_num < 1) {
return;
}
printf("%d:%s:%d:%s\n", line_len, filename, line_num, line_val);
}

How to loop a nested array in C

I've been developing a guessing game in which the goal is to guess the character selected by the user among specific characters, anyway, my first and only idea is to create an array with the questions to be asked, and each question has its options like in the code below I'm a newbie in C language so that I there are several things which I'm not sure how to handle. In short, I'd like to know how can I loop over the array showing to the user the questions with its questions to be answered? Here's the code.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define ROW 500
#define LINE 200
//Read file and append to an array buffer
char *characters(){
char *source = NULL;
FILE *fp = fopen("file.txt", "r");
if (fp != NULL) {
/* Go to the end of the file. */
if (fseek(fp, 0L, SEEK_END) == 0) {
/* Get the size of the file. */
long bufsize = ftell(fp);
if (bufsize == -1) { /* Error */ }
/* Allocate our buffer to that size. */
source = malloc(sizeof(char) * (bufsize + 1));
/* Go back to the start of the file. */
if (fseek(fp, 0L, SEEK_SET) != 0) { /* Error */ }
/* Read the entire file into memory. */
size_t newLen = fread(source, sizeof(char), bufsize, fp);
if ( ferror( fp ) != 0 ) {
fputs("Error reading file", stderr);
} else {
source[newLen++] = '\0'; /* Just to be safe. */
}
}
fclose(fp);
}
return source;
}
char *strndup(const char *s, size_t n) {
char *p;
size_t n1;
for (n1 = 0; n1 < n && s[n1] != '\0'; n1++)
continue;
p = malloc(n + 1);
if (p != NULL) {
memcpy(p, s, n1);
p[n1] = '\0';
}
return p;
}
// User input
char *input(){
char *value;
char buffer[10];
int j = 0;
while( j < 1 && fgets(buffer, 10, stdin) != NULL){
value = strndup(buffer, 10);
j++;
}
return value;
}
// Main function
int main (void)
{
char *questions[] = {
"Genre",{"male","female"},
"Hair", {"black","red","blond"},
"Cloths",{"dress","shirt","pants"},
"pet", {"dog","cat","pig"}
};
int asked[4] = {0};
char *answers[5];
char buffer[6];
srand(time(NULL));
for (int i = 0; i < 4; i++) {
int q = rand() % 4;
while (asked[q])
q = rand() % 4;
asked[q]++;
printf ("%s\n", questions[q]);
answers[i] = input();
}
for(int i = 0; i < 4; i++)
{
printf(" %s ",answers[i]);
}
return 0;
}
That's the file's structure I'll compare as long as I have all the answers from the user.
female,blond,vestido,pig,character b
male,black,shirt,pants,dog,character c
male,black,shirt,pants,cat,character d
female,blond,dress,cat,character A
male,red,shirt,pants,pig,character e

Reading a file line-by-line into an array of strings in C

I'm trying to read the following file line by line into an array of strings where each line is an element of the array:
AATGC
ATGCC
GCCGT
CGTAC
GTACG
TACGT
ACGTA
CGTAC
GTACG
TACGA
ACGAA
My code is as follows:
void **get_genome(char *filename) {
FILE *file = fopen(filename, "r");
int c;
int line_count = 0;
int line_length = 0;
for (c = getc(file); c != EOF; c = getc(file)) {
if (c == '\n') line_count++;
else line_length++;
}
line_length /= line_count;
rewind(file);
char **genome = calloc(line_length * line_count, sizeof(char));
for (int i = 0; i < line_count; i++) {
genome[i] = calloc(line_length, sizeof(char));
fscanf(file, "%s\n", genome[i]);
}
printf("%d lines of %d length\n", line_count, line_length);
for (int i = 0; i < line_count; i++)
printf("%s\n", genome[i]);
}
However, for some reason I get garbage output for the first 2 elements of the array. The following is my output:
`NP��
�NP��
GCCGT
CGTAC
GTACG
TACGT
ACGTA
CGTAC
GTACG
TACGA
ACGAA
You seem to assume that all lines have the same line length. If such is the case, you still have some problems:
the memory for the row pointers is allocated incorrectly, it should be
char **genome = calloc(line_count, sizeof(char *));
or better and less error prone:
char **genome = calloc(line_count, sizeof(*genome));
the memory for each row should be one byte longer the the null terminator.
\n is the fscanf() format string matches any sequence of whitespace characters. It is redundant as %s skips those anyway.
it is safer to count items separated by white space to avoid miscounting the items if the file contains any blank characters.
you do not close file.
you do not return the genome at the end of the function
you do not check for errors.
Here is a modified version:
void **get_genome(const char *filename) {
FILE *file = fopen(filename, "r");
if (file == NULL)
return NULL;
int line_count = 1;
int item_count = 0;
int item_length = -1;
int length = 0;
int c;
while ((c = getc(file)) != EOF) {
if (isspace(c)) {
if (length == 0)
continue; // ignore subsequent whitespace
item_count++;
if (item_length < 0) {
item_length = length;
} else
if (item_length != length) {
printf("inconsistent item length on line %d\", line_count);
fclose(file);
return NULL;
}
length = 0;
} else {
length++;
}
}
if (length) {
printf("line %d truncated\n", line_count);
fclose(file);
return NULL;
}
rewind(file);
char **genome = calloc(item_count, sizeof(*genome));
if (genome == NULL) {
printf("out of memory\n");
fclose(file);
return NULL;
}
for (int i = 0; i < item_count; i++) {
genome[i] = calloc(item_length + 1, sizeof(*genome[i]));
if (genome[i] == NULL) {
while (i > 0) {
free(genome[i]);
}
free(genome);
printf("out of memory\n");
fclose(file);
return NULL;
}
fscanf(file, "%s", genome[i]);
}
fclose(file);
printf("%d items of %d length on %d lines\n",
item_count, item_length, line_count);
for (int i = 0; i < item_count; i++)
printf("%s\n", genome[i]);
return genome;
}
char **genome = calloc(line_length * line_count, sizeof(char));
must be
char **genome = calloc(line_count, sizeof(char*));
or more 'secure'
char **genome = calloc(line_count, sizeof(*genome));
in case you change the type of genome
else the allocated block if not enough long if you are in 64b because line_count is 5 rather than 8, so you write out of it with an undefined behavior
You also need to return genome at the end of the function
It was also possible to not count the number of lines and to use realloc to increment your array when reading the file
As I see the lines have the same length. Your function should inform the caller how many lines have been read. There is no need of reading the file twice. There is no need of calloc (which is more expensive function). Always check the result of the memory allocation functions.
Here is a bit different version of the function:
char **get_genome(char *filename, size_t *line_count) {
FILE *file = fopen(filename, "r");
int c;
size_t line_length = 0;
char **genome = NULL, **tmp;
*line_count = 0;
if(file)
{
while(1)
{
c = getc(file);
if( c == EOF || c == '\n') break;
line_length++;
}
rewind(file);
while(1)
{
char *line = malloc(line_length + 1);
if(line)
{
if(!fgets(line, line_length + 1, file))
{
free(line);
break;
}
line[line_length] = 0;
tmp = realloc(genome, (*line_count + 1) * sizeof(*genome));
if(tmp)
{
genome = tmp;
genome[*line_count] = line;
*line_count += 1;
}
else
{
// do some memory free magic
}
}
}
fclose(file);
}
return genome;
}

Reading a file and saving the same content without the comments in the file in a new file

my task is to remove all the comments from a .c file and save the content in another .o file.
Given file:
// Sums two integers.
// Parameters: a, the first integer; b the second integer.
// Returns: the sum.
int add(int a, int b)
{
return a + b; // An inline comment.
}
Should look like:
int add(int a, int b)
{
return a + b;
}
I have been trying this multiple times and I reached this state:
#include <stdio.h>
int main(int argc, char **argv)
{
FILE * fPtr;
fPtr = fopen("test.o", "w");
char line[300];
FILE *file = fopen("math_functions.c", "r");
if (file == NULL) {
printf("Error: Could not open %s!\n", "math_functions.c");
return -1;
}
else {
while(fgets(line, 300, file)) {
int len = strlen(line);
char helperLineArray[300];
for (int i = 1; i < len; i++) {
if (line[i] == '/' && line[i-1] == '/') {
break;
}
else
{
helperLineArray[i-1] = line[i-1];
}
}
fputs(helperLineArray, fPtr);
}
}
return 0;
}
Thank you in advance!
I think, this would help
FILE * fPtr;
fPtr = fopen("test.o", "w");
char line[256],helperLineArray[10000];
FILE *file = fopen("math_functions.c", "r");
int j=0;
while (fgets(line, sizeof(line), file)){
int len = strlen(line);
for (int i = 0; i < len-1; i++) {
if (line[i] == '/' && line[i+1] == '/') {
break;
}
else
{
helperLineArray[j++] = line[i];
}
}
helperLineArray[j++] = '\n';
}
fputs(helperLineArray, fPtr);
return 0;

Find longest comment line in a file in C

So I have this function to find the longest line in a file:
int LongestLine(FILE *filename) {
char buf[MAX_LINE_LENGTH] = {0};
char line_val[MAX_LINE_LENGTH] = {0};
int line_len = -1;
int line_num = -1;
int cur_line = 1;
filename = fopen(filename, "r");
while(fgets(buf, MAX_LINE_LENGTH, filename) != NULL) {
int len_tmp = strlen(buf) - 1;
if(buf[len_tmp] == '\n')
buf[len_tmp] = '\0';
if(line_len < len_tmp) {
strncpy(line_val, buf, len_tmp + 1);
line_len = len_tmp;
line_num = cur_line;
}
cur_line++;
}
return line_num;
}
and I was thinking of combining it with this one:
bool startsWith(const char *pre, const char *str)
{
size_t lenpre = strlen(pre),
lenstr = strlen(str);
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
}
But.. however, the LongestLine() function returns an integer. So how can I use both functions so that I may find the longest line starting with let's say //?
Add a call to startsWith (to see if it is a comment) in your if statement to decide if a line is the new longest:
if( startsWith("//",buf) && (line_len < len_tmp) ) {

Resources