Calculating entropy in C - c

I'm trying to find the entropy of any given file. However, when I run my program, it always gives 3.00000 as an answer. I haven't used C in awhile, but I'm not sure where I'm going wrong here. I've been fiddling with it for a few hours now. Any tips would be great, thank you!
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define SIZE 256
int entropy_calc(long byte_count[], int length)
{
float entropy;
float count;
int i;
/* entropy calculation */
for (i = 0; i < SIZE; i++)
{
if (byte_count[i] != 0)
{
count = (float) byte_count[i] / (float) length;
entropy += -count * log2f(count);
}
}
return entropy;
}
int main(int argc, char **argv)
{
FILE *inFile;
int i;
int j;
int n; // Bytes read by fread;
int length; // length of file
float count;
float entropy;
long byte_count[SIZE];
unsigned char buffer[1024];
/* do this for all files */
for(j = 1; j < argc; j++)
{
memset(byte_count, 0, sizeof(long) * SIZE);
inFile = fopen(argv[j], "rb"); // opens the file given on command line
if(inFile == NULL) // error-checking to see if file exists
{
printf("Files does not exist. `%s`\n", argv[j]);
continue;
}
/* Read the whole file in parts of 1024 */
while((n = fread(buffer, 1, 1024, inFile)) != 0)
{
/* Add the buffer to the byte_count */
for (i = 0; i < n; i++)
{
byte_count[(int) buffer[i]]++;
length++;
}
}
fclose(inFile);
float entropy = entropy_calc(byte_count, length);
printf("%02.5f \t%s\n", entropy, argv[j]);
}
return 0;
}

Your return type of the function entropy_calc() should be float not int.

Related

Why do I get a segmentation fault here

Excuse me for the sloppy code, I am still a beginner. But after putting a long time into this programming question I got from my Uni I don't know where to turn.
The question itself is: we need to read from the "staedte.csv" (which displays the population and cities of German states) and then return an array of strings with the strings formatted like this: The city ***** has a population of ****.
You are supposed to pass in 2 arguments into the cli: the number 100 and the state you want to check the cities and population for: example Bayern (Bavaria).
My plan was to make a 2d array. First I would dynamically allocate the memory for the first one by making a for loop and iterating over the original csv to check how many states in the csv = the state from the arguments. Then I would make a dynamic array using the amount of states in the csv matching. Then I would iterate (with for loop) over the list of matching states and then first check the length of the formatted string then: The city ***** has a population of ****., then allocate that memory and store the pointer to that info in the previously created array. Then I try to print the first item of that array, meaning the pointer.
I checked and there are 8 elements in the csv with Bavaria as their state, but in the for loop
for (j = 0; j < 8; j++)
if j is larger than 4 then I Get a segmentation fault even though the space is supposed to be allocated.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "input3.h"
/* Die Konstanten:
* int MAX_LAENGE_STR - die maximale String Länge
* int MAX_LAENGE_ARR - die maximale Array Länge
* sind input3.c auf jeweils 255 und 100 definiert
*/
int main(int argc, char **argv)
{
if (argc < 3)
{
printf("Aufruf: %s <anzahl> <bundesland>\n", argv[0]);
printf("Beispiel: %s 100 Bayern\n", argv[0]);
printf("Klein-/Großschreibung beachten!\n");
exit(1);
}
// int anzahl = atoi(argv[1]);
char *bundesland = argv[2];
// Statisch allokierter Speicher
char staedte[MAX_LAENGE_ARR][MAX_LAENGE_STR];
char laender[MAX_LAENGE_ARR][MAX_LAENGE_STR];
int bewohner[MAX_LAENGE_ARR];
read_file("staedte.csv", staedte, laender, bewohner);
// printf("%s %s", bundesland, laender[5]);
int CityCounter = 0;
int CopyCounter = 0;
int *CityArray;
CityArray = (int *)malloc(0);
for (int i = 0; i < MAX_LAENGE_ARR; i++)
{
if (strncmp(laender[i], bundesland, strnlen(bundesland, 10)) == 0)
{
CityArray = realloc(CityArray, sizeof(CityArray) + sizeof(int) * 1);
CityArray[CityCounter] = i;
CityCounter++;
}
}
// printf("%d", CityCounter);
char **string = (char **)malloc(CityCounter * sizeof(int));
int j;
printf("%d", (int)sizeof(CityArray));
int numOfCities = (int)sizeof(CityArray);
for (j = 0; j < 8; j++)
{
char buffer[100];
size_t size = snprintf(buffer, 50, "Die Stadt %s hat %d Einwohner.\n", staedte[CityArray[j]], bewohner[CityArray[j]]);
string[j] = malloc(sizeof(char) * size);
// string[j][size] = "\0";
strncpy(string[j], buffer, size);
}
// printf("%s", string[2]);
for (int i = 0; i < numOfCities; i++)
{
printf("%s", string[i]);
}
// write_file(string, sizeof(string));
free(string);
}
this is the code I wrote.
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include "input3.h"
int MAX_LAENGE_STR = 255;
int MAX_LAENGE_ARR = 100;
void write_file(char *result[], int len)
{
FILE *fp = fopen("resultat.txt", "w");
if (fp == NULL)
{
perror("resultat.txt");
exit(1);
}
for (int i = 0; i < len; i++)
{
fprintf(fp, "%s\n", result[i]);
}
fclose(fp);
}
int read_file(char *dateiname, char staedte[][MAX_LAENGE_STR], char laender[][MAX_LAENGE_STR], int bewohner[])
{
FILE *fp = fopen(dateiname, "r");
if (fp == NULL)
{
perror(dateiname);
exit(1);
}
char stadt[MAX_LAENGE_STR];
char land[MAX_LAENGE_STR];
int anzahl;
int i = 0;
int len;
while (fscanf(fp, "\"%[^\"]\";\"%[^\"]\";%d\n", stadt, land, &anzahl) != EOF)
{
if (i >= MAX_LAENGE_ARR)
{
printf("ERROR: Die Datei ist größer als erwartet!");
return i;
}
len = strlen(stadt) + 1;
strncpy(staedte[i], stadt, len - 1);
staedte[i][len - 1] = '\0';
len = strlen(land) + 1;
strncpy(laender[i], land, len - 1);
laender[i][len - 1] = '\0';
bewohner[i] = anzahl;
i++;
}
fclose(fp);
return i;
}
extern int MAX_LAENGE_ARR;
extern int MAX_LAENGE_STR;
void write_file(char *result[], int len);
int read_file(char *dateiname, char staedte[][MAX_LAENGE_STR], char laender[][MAX_LAENGE_STR], int bewohner []);
This code was supplied by our Uni but it should be correct.
So I changed the
char **string = (char **)malloc(CityCounter * sizeof(int)); to char
**string = (char *)malloc(CityCounter * sizeof(char)); and now I don't get null.
But if I once again change the
for (j = 0; j < 4; j++) {}
and modify the j larger then 4 then I get a bus error

Sort ints from a txt file

I need to sort ints from a file in ascending order and print them to the standard output. I can't modify the structure of the file.
The txt file looks like this:
41
65
68
35
51
...(one number in a row)
My program works just fine for small files, but I have to optomize it for larger files (like 3 million numbers) using malloc, but don't know exactly where and how. I'd like to ask for help in this. (I'm a beginner)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER 100000
int sort(int size, int arr[])
{
for (int i = 0; i < size - 1; i++)
{
for (int j = 0; j < size - i - 1; j++)
{
if (arr[j] > arr[j + 1])
{
int swap = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = swap;
}
}
}
}
int main(int argc, char *argv[])
{
char *filename = argv[1];
char s[20];
if (argc == 1)
{
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
int arr[BUFFER];
int i = 0;
int size = 0;
while ((fgets(s, BUFFER, fp)) != NULL)
{
s[strlen(s) - 1] = '\0';
arr[i] = atoi(s);
++i;
++size;
}
fclose(fp);
sort(size, arr);
for (int i = 0; i < size; ++i)
{
printf("%d\n", arr[i]);
}
return 0;
}
Your program could look like this:
#include <stdlib.h>
#include <stdio.h>
static int numcompar(const void *a, const void *b) {
const int *x = a;
const int *y = b;
// it is tempting to return *x - *y; but undefined behavior lurks
return *x < *y ? -1 : *x == *y ? 0 : 1;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
// TODO: handle error
abort();
}
char *filename = argv[1];
// open the file
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
abort();
}
// this will be our array
// note realloc(NULL is equal to malloc()
int *arr = NULL;
size_t arrcnt = 0;
// note - I am using fscanf for simplicity
int temp = 0;
while (fscanf(fp, "%d", &temp) == 1) {
// note - reallocating the space each number for the next number
void *tmp = realloc(arr, sizeof(*arr) * (arrcnt + 1));
if (tmp == NULL) {
free(arr);
fclose(fp);
abort();
}
arr = tmp;
// finally assignment
arr[arrcnt] = temp;
arrcnt++;
}
fclose(fp);
// writing sorting algorithms is boring
qsort(arr, arrcnt, sizeof(*arr), numcompar);
for (size_t i = 0; i < arrcnt; ++i) {
printf("%d\n", arr[i]);
}
free(arr);
}
Note that reallocating for one int at a time is inefficient - realloc is usually a costly function. The next step would be to keep the number of the size of the array and "used" (assigned to) elements of the array separately and reallocate the array by a ratio greater then 1. There are voices that prefer to use the golden ratio number in such cases.
To read an undetermined number of entries from the input file, you can allocate and reallocate an array using realloc() as more entries are read. For better performance it is recommended to increase the allocated size by a multiple instead of increasing linearly, especially one entry at a time.
Your sorting routine is inappropriate for large arrays: insertion sort has quadratic time complexity, so it might take a long time for 3 million items, unless they are already sorted. Use qsort() with a simple comparison function for this.
Here is a modified program:
#include <stdio.h>
#include <stdlib.h>
static int compare_int(const void *pa, const void *pb) {
int a = *(const int *)pa;
int b = *(const int *)pb;
// return -1 if a < b, 0 if a == b and +1 if a > b
return (a > b) - (a < b);
}
int main(int argc, char *argv[]) {
if (argc == 1) {
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
char buf[80];
size_t n = 0, size = 0;
int *array = NULL;
/* read the numbers */
while (fgets(buf, sizeof buf, fp)) {
if (n == size) {
/* increase size by at least 1.625 */
size_t newsize = size + size / 2 + size / 8 + 32;
int *newarray = realloc(array, newsize * sizeof(*array));
if (newarray == NULL) {
printf("cannot allocate space for %zu numbers\n", newsize);
free(array);
fclose(fp);
exit(1);
}
array = newarray;
size = newsize;
}
array[n++] = strtol(buf, NULL, 10);
}
fclose(fp);
/* sort the array */
qsort(array, n, sizeof(*array), compare_int);
for (size_t i = 0; i < n; i++) {
printf("%d\n", array[i]);
}
free(array);
return 0;
}

Printing randomly to the screen in C

I am trying to print to the screen randomly in C. I am using random and time to generate random index and printing it but It definetely is not the way to do it. How do I print every element randomly to the screen in c?
Here is the code I have so far.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define leng 128
#define arr 10
int main(void)
{
char line[arr][leng];
char fname[20];
FILE *fptr = NULL;
int i = 0;
int tot = 0;
scanf("%s",fname);
fptr = fopen(fname, "r");
while(fgets(line[i], leng, fptr))
{
line[i][strlen(line[i]) - 1] = '\0';
i++;
}
srand(time(0));
for(int i = 0; i < 6; ++i)
{
printf("%s\n", line[rand()%10]);
}
return 0;
}
My random text file has 6 lines of code.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define LEN 128 //Don't mind the capital, personal preference for defines
#define ARR 6 //Make this match the number of lines in your file
int main(void)
{
char line[ARR][LEN];
char fname[20];
int test[ARR] = {0}; //Added an array to test if index already created
int ind[ARR] = {0}; //Array to store index created randomly
int ind_done = 0; //Counter for index array
FILE *fptr = NULL;
int i = 0;
int tot = 0;
scanf("%s",fname);
fptr = fopen(fname, "r");
while(fgets(line[i], LEN, fptr))
{
line[i][strlen(line[i]) - 1] = '\0';
i++;
}
srand(time(0));
for(int i = 0; i < ARR; ++i)
{
printf("%s\n", line[rand()%ARR]);
}
//Keep creating indexes using rand till all unique indexes are created
while (ind_done<ARR) {
int i = rand()%ARR;
if (test[i] == 0) {
ind[ind_done] = i;
ind_done++;
test[i] = 1;
}
}
for(int i = 0; i < ARR; ++i)
{
printf("%s\n", line[ind[i]]);
}
return 0;
}

Utils.cpp function and getting prefilled array from text file

This file needs to read a text file with numbers on each line, and preload the values from the text file into myArray. It also needs to read the integer for arraySize, so that it can be used to calculate the mean and median. The code is in c, but the teacher uses c++ file names and doesn't want us t change it. I also want to total the numbers, but when executed it doesn't print anything.
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "Utils.cpp"
int main(void)
{
//declare variables
int myArray[]={};
// declare a string to hold the name of the file
// that will be read into your statsArray
char statsFile[] = "2005.txt"; /* file to be read in */
int arraySize = getFileSize(statsFile); /* determine size of file */
//dynamically declare statsArray based on size of file
int *statsArray = (int*)malloc(sizeof(int)*arraySize);
// initialize arrays with zero values
initializeArray(myArray, arraySize);
//load array from file
loadArray(myArray, arraySize, statsFile);
// sum stats array
int total=0;
for(int x=0;x<myArray[arraySize-1];x++){
total+=myArray[x]; //adds each element of array to total
}
printf("%i",total);
Here is the utils.cpp that was provided.
#include <stdio.h>
int getFileSize(char fileName[])
{
FILE *fr; /* declare the file pointer */
int c; /*Nb int (not char) for the EOF */
int newlineCount = 0;
// open the file to count the lines
fr = fopen(fileName, "rt");
// count the newline characters
while ( (c=fgetc(fr)) != EOF)
{
if (c =='\n')
newlineCount++;
}
return newlineCount;
}
void loadArray(int list[], int size, char fileName[])
{
FILE *fr; // declare the file pointer
int count = 0; // running line count
char line[80];
fr = fopen(fileName, "rt");
while(fgets(line, 80, fr) != NULL)
{
// get a line, p to 80 chars fr. done if NULL
sscanf(line, "%i", &list[count]);
count++;
}
}
void initializeArray(int list[], int size)
{
for (int i = 0; i < size; i++)
{
list[i] = 0;
}
}
void sortArray(int list[], int size)
{
int tempNumber;
for (int i = 0; i < size; i++)
{
for (int j = i +1; j < size; j++)
{
if(list[i] > list[j])
{
tempNumber = list[i];
list[i] = list[j];
list[j] = tempNumber;
}
}
}
}

Segmentation fault whilst reading strings from a text file into a struct

Im trying to read a file into a struct which contains a char array as shown in the code below, however it gives an output of segmentation fault: 11. I have tried everything including using similar examples but it has done my head in.
My code is as below:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LENGTH 1024
struct Raw_Word{
char word[MAX_LENGTH];
char filename [25];
int length;
};
struct Final_Word{
char word[MAX_LENGTH];
int length;
int amount;
};
struct Raw_Word raw_word[MAX_LENGTH];
int main(int argc, char* argv[]) {
if (argc > 10) {
printf("Maximum of 10 files allowed");
return 1;
}
int i = 0;
int lines = 0;
for (i = 1; i <= argc; i++) {
FILE *fp = fopen(argv[i], "r");
while(fgets(raw_word[lines].word, MAX_LENGTH, fp)){
//printf("%s", raw_word[lines].word);
}
fclose(fp);
}
for(int j = 0; j < lines; j++){
printf("%s\n", raw_word[j].word);
}
return 0;
}
The line
for (i = 1; i <= argc; i++)
is not right. You need to stop the index at argc-1.
for (i = 1; i < argc; i++)
or
for (i = 1; i != argc; i++)

Resources