Sort ints from a txt file - c

I need to sort ints from a file in ascending order and print them to the standard output. I can't modify the structure of the file.
The txt file looks like this:
41
65
68
35
51
...(one number in a row)
My program works just fine for small files, but I have to optomize it for larger files (like 3 million numbers) using malloc, but don't know exactly where and how. I'd like to ask for help in this. (I'm a beginner)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER 100000
int sort(int size, int arr[])
{
for (int i = 0; i < size - 1; i++)
{
for (int j = 0; j < size - i - 1; j++)
{
if (arr[j] > arr[j + 1])
{
int swap = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = swap;
}
}
}
}
int main(int argc, char *argv[])
{
char *filename = argv[1];
char s[20];
if (argc == 1)
{
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
int arr[BUFFER];
int i = 0;
int size = 0;
while ((fgets(s, BUFFER, fp)) != NULL)
{
s[strlen(s) - 1] = '\0';
arr[i] = atoi(s);
++i;
++size;
}
fclose(fp);
sort(size, arr);
for (int i = 0; i < size; ++i)
{
printf("%d\n", arr[i]);
}
return 0;
}

Your program could look like this:
#include <stdlib.h>
#include <stdio.h>
static int numcompar(const void *a, const void *b) {
const int *x = a;
const int *y = b;
// it is tempting to return *x - *y; but undefined behavior lurks
return *x < *y ? -1 : *x == *y ? 0 : 1;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
// TODO: handle error
abort();
}
char *filename = argv[1];
// open the file
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
abort();
}
// this will be our array
// note realloc(NULL is equal to malloc()
int *arr = NULL;
size_t arrcnt = 0;
// note - I am using fscanf for simplicity
int temp = 0;
while (fscanf(fp, "%d", &temp) == 1) {
// note - reallocating the space each number for the next number
void *tmp = realloc(arr, sizeof(*arr) * (arrcnt + 1));
if (tmp == NULL) {
free(arr);
fclose(fp);
abort();
}
arr = tmp;
// finally assignment
arr[arrcnt] = temp;
arrcnt++;
}
fclose(fp);
// writing sorting algorithms is boring
qsort(arr, arrcnt, sizeof(*arr), numcompar);
for (size_t i = 0; i < arrcnt; ++i) {
printf("%d\n", arr[i]);
}
free(arr);
}
Note that reallocating for one int at a time is inefficient - realloc is usually a costly function. The next step would be to keep the number of the size of the array and "used" (assigned to) elements of the array separately and reallocate the array by a ratio greater then 1. There are voices that prefer to use the golden ratio number in such cases.

To read an undetermined number of entries from the input file, you can allocate and reallocate an array using realloc() as more entries are read. For better performance it is recommended to increase the allocated size by a multiple instead of increasing linearly, especially one entry at a time.
Your sorting routine is inappropriate for large arrays: insertion sort has quadratic time complexity, so it might take a long time for 3 million items, unless they are already sorted. Use qsort() with a simple comparison function for this.
Here is a modified program:
#include <stdio.h>
#include <stdlib.h>
static int compare_int(const void *pa, const void *pb) {
int a = *(const int *)pa;
int b = *(const int *)pb;
// return -1 if a < b, 0 if a == b and +1 if a > b
return (a > b) - (a < b);
}
int main(int argc, char *argv[]) {
if (argc == 1) {
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
char buf[80];
size_t n = 0, size = 0;
int *array = NULL;
/* read the numbers */
while (fgets(buf, sizeof buf, fp)) {
if (n == size) {
/* increase size by at least 1.625 */
size_t newsize = size + size / 2 + size / 8 + 32;
int *newarray = realloc(array, newsize * sizeof(*array));
if (newarray == NULL) {
printf("cannot allocate space for %zu numbers\n", newsize);
free(array);
fclose(fp);
exit(1);
}
array = newarray;
size = newsize;
}
array[n++] = strtol(buf, NULL, 10);
}
fclose(fp);
/* sort the array */
qsort(array, n, sizeof(*array), compare_int);
for (size_t i = 0; i < n; i++) {
printf("%d\n", array[i]);
}
free(array);
return 0;
}

Related

Why do I get a segmentation fault here

Excuse me for the sloppy code, I am still a beginner. But after putting a long time into this programming question I got from my Uni I don't know where to turn.
The question itself is: we need to read from the "staedte.csv" (which displays the population and cities of German states) and then return an array of strings with the strings formatted like this: The city ***** has a population of ****.
You are supposed to pass in 2 arguments into the cli: the number 100 and the state you want to check the cities and population for: example Bayern (Bavaria).
My plan was to make a 2d array. First I would dynamically allocate the memory for the first one by making a for loop and iterating over the original csv to check how many states in the csv = the state from the arguments. Then I would make a dynamic array using the amount of states in the csv matching. Then I would iterate (with for loop) over the list of matching states and then first check the length of the formatted string then: The city ***** has a population of ****., then allocate that memory and store the pointer to that info in the previously created array. Then I try to print the first item of that array, meaning the pointer.
I checked and there are 8 elements in the csv with Bavaria as their state, but in the for loop
for (j = 0; j < 8; j++)
if j is larger than 4 then I Get a segmentation fault even though the space is supposed to be allocated.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "input3.h"
/* Die Konstanten:
* int MAX_LAENGE_STR - die maximale String Länge
* int MAX_LAENGE_ARR - die maximale Array Länge
* sind input3.c auf jeweils 255 und 100 definiert
*/
int main(int argc, char **argv)
{
if (argc < 3)
{
printf("Aufruf: %s <anzahl> <bundesland>\n", argv[0]);
printf("Beispiel: %s 100 Bayern\n", argv[0]);
printf("Klein-/Großschreibung beachten!\n");
exit(1);
}
// int anzahl = atoi(argv[1]);
char *bundesland = argv[2];
// Statisch allokierter Speicher
char staedte[MAX_LAENGE_ARR][MAX_LAENGE_STR];
char laender[MAX_LAENGE_ARR][MAX_LAENGE_STR];
int bewohner[MAX_LAENGE_ARR];
read_file("staedte.csv", staedte, laender, bewohner);
// printf("%s %s", bundesland, laender[5]);
int CityCounter = 0;
int CopyCounter = 0;
int *CityArray;
CityArray = (int *)malloc(0);
for (int i = 0; i < MAX_LAENGE_ARR; i++)
{
if (strncmp(laender[i], bundesland, strnlen(bundesland, 10)) == 0)
{
CityArray = realloc(CityArray, sizeof(CityArray) + sizeof(int) * 1);
CityArray[CityCounter] = i;
CityCounter++;
}
}
// printf("%d", CityCounter);
char **string = (char **)malloc(CityCounter * sizeof(int));
int j;
printf("%d", (int)sizeof(CityArray));
int numOfCities = (int)sizeof(CityArray);
for (j = 0; j < 8; j++)
{
char buffer[100];
size_t size = snprintf(buffer, 50, "Die Stadt %s hat %d Einwohner.\n", staedte[CityArray[j]], bewohner[CityArray[j]]);
string[j] = malloc(sizeof(char) * size);
// string[j][size] = "\0";
strncpy(string[j], buffer, size);
}
// printf("%s", string[2]);
for (int i = 0; i < numOfCities; i++)
{
printf("%s", string[i]);
}
// write_file(string, sizeof(string));
free(string);
}
this is the code I wrote.
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include "input3.h"
int MAX_LAENGE_STR = 255;
int MAX_LAENGE_ARR = 100;
void write_file(char *result[], int len)
{
FILE *fp = fopen("resultat.txt", "w");
if (fp == NULL)
{
perror("resultat.txt");
exit(1);
}
for (int i = 0; i < len; i++)
{
fprintf(fp, "%s\n", result[i]);
}
fclose(fp);
}
int read_file(char *dateiname, char staedte[][MAX_LAENGE_STR], char laender[][MAX_LAENGE_STR], int bewohner[])
{
FILE *fp = fopen(dateiname, "r");
if (fp == NULL)
{
perror(dateiname);
exit(1);
}
char stadt[MAX_LAENGE_STR];
char land[MAX_LAENGE_STR];
int anzahl;
int i = 0;
int len;
while (fscanf(fp, "\"%[^\"]\";\"%[^\"]\";%d\n", stadt, land, &anzahl) != EOF)
{
if (i >= MAX_LAENGE_ARR)
{
printf("ERROR: Die Datei ist größer als erwartet!");
return i;
}
len = strlen(stadt) + 1;
strncpy(staedte[i], stadt, len - 1);
staedte[i][len - 1] = '\0';
len = strlen(land) + 1;
strncpy(laender[i], land, len - 1);
laender[i][len - 1] = '\0';
bewohner[i] = anzahl;
i++;
}
fclose(fp);
return i;
}
extern int MAX_LAENGE_ARR;
extern int MAX_LAENGE_STR;
void write_file(char *result[], int len);
int read_file(char *dateiname, char staedte[][MAX_LAENGE_STR], char laender[][MAX_LAENGE_STR], int bewohner []);
This code was supplied by our Uni but it should be correct.
So I changed the
char **string = (char **)malloc(CityCounter * sizeof(int)); to char
**string = (char *)malloc(CityCounter * sizeof(char)); and now I don't get null.
But if I once again change the
for (j = 0; j < 4; j++) {}
and modify the j larger then 4 then I get a bus error

What am I doing wrong with malloc and realloc of array of struct?

I'm trying to build in C an array of structures without defining the length of the maximum size of the array.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct text {
char *final;
} text;
int main() {
int n, sizearray = 10, i;
char *str;
text *testo;
testo = (text *)malloc(sizeof(text) * sizearray);
fgets(str, 1024, stdin);
i = 0;
while (str[0] != 'q') {
if (i == sizearray - 1) {
testo = (text *)realloc(testo, sizearray * 2 * sizeof(text));
}
n = strlen(str);
n = n + 1;
testo[i].finale = (char *)malloc(sizeof(char) * n);
strcpy(testo[i].finale, str);
i++;
fgets(str, 1024, stdin);
}
for (i = 0; i < sizearray; i++)
printf("%s \n", testo[i].finale);
return 0;
}
this gives me
process finished with exit code 139 (interrupted by signal 11:SIGSEV).
What am I doing wrong?
There are multiple issues in your code:
[major] str is an uninitialized pointer. You should make it an array of char defined with char str[1024].
[major] you do not adjust sizearray when you double the size of the array, hence you will never reallocate the array after the initial attempt at i = 9.
[major] the final loop goes to sizearray but there are potentially many uninitialized entries at the end of the array. You should stop at the last entry stored into the array.
you should also check the return value of fgets() to avoid an infinite loop upon premature end of file.
you should test for potential memory allocation failures to avoid undefined behavior.
Here is a modified version:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct text {
char *finale;
} text;
int main() {
char str[1024];
text *testo = NULL;
size_t sizearray = 0;
size_t i, n = 0;
while (fgets(str, sizeof str, stdin) && *str != 'q') {
if (n == sizearray) {
/* increase the size of the array by the golden ratio */
sizearray += sizearray / 2 + sizearray / 8 + 10;
testo = realloc(testo, sizearray * sizeof(text));
if (testo == NULL) {
fprintf(stderr, "out of memory\n");
return 1;
}
}
testo[n].finale = strdup(str);
if (testo[n].finale == NULL) {
fprintf(stderr, "out of memory\n");
return 1;
}
n++;
}
for (i = 0; i < n; i++) {
printf("%s", testo[i].finale);
}
for (i = 0; i < n; i++) {
free(testo[i].finale);
}
free(testo);
return 0;
}
str is uninitialized. Either allocate memory with malloc or define it as an array with char str[1024].

Print last few lines of a text file

Currently, I am trying to create a C program that prints the last few lines of a text file, read in through the command line. However, it is currently causing a segmentation error when I try to copy the strings from fgets into the main array. I have been unable to fix this, and so have not been able to test the rest of my code. How would I begin to fix the segmentation error? I have posted the code below:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
int i=1,j,printNumber;
char **arr = (char **) malloc (100 * sizeof(char *));
char *line = (char *) malloc (80 * sizeof(char));
if (argc == 1) {
printNumber = 10;
}
else {
printNumber = atoi(argv[1]);
}
while (fgets(line,80,stdin) != NULL) {
if (line != NULL) {
line[strlen(line)-1] = '\0';
strcpy(arr[i],line); //SEGMENTATION ERROR!!!!
}
else {
free(line);
strcpy(arr[i],NULL);
}
i++;
printf("%d ",i);
}
free(arr);
for (j = i-printNumber-1; j < i-1; j++) {
printf("%s ", arr[j]);
}
printf("\n");
return 0;
}
You are allocating space for arr, which is a pointer to a pointer to char, but not allocating any individual char * pointers within arr.
Since you allocated arr with the size of 100 * sizeof(char *), I assume you want 100 sub-entries in arr. Sure:
for(i = 0; i < 100; i++)
arr[i] = malloc(80 * sizeof(char));
Then, when you free arr:
for(i = 0; i < 100; i++)
free(arr[i]);
free(arr);
Note that it is good practice to always check malloc for failure (return value of NULL) and handle it, and to set pointers to NULL after freeing them once to avoid double-free bugs.
You don't always know the length of the longest line (not until you try to read) OR how many last lines you are expected to keep track of (but is given at runtime). Thus, both of these values need to be known before you allocate memory or delegated to a function that does it for you.
#include <stdio.h>
#include <stdlib.h>
struct Line {
char *line; // content
size_t storage_sz; // allocation size of line memory
ssize_t sz; // size of line, not including terminating null byte ('\0')
};
int main(int argc, char *argv[]) {
int max_lines = 10;
if (argc > 1) {
max_lines = atoi(argv[1]);
}
if (max_lines < 0) {
fprintf(stderr, "%s\n", "Sorry, no defined behaviour of negative values (yet)\n");
return EXIT_FAILURE;
}
// keep an extra slot for the last failed read at EOF
struct Line *lines = (struct Line *) calloc(max_lines + 1, sizeof(struct Line));
int end = 0;
int size = 0;
// only keep track of the last couple of lines
while ((lines[end].sz = getline(&lines[end].line, &lines[end].storage_sz, stdin)) != -1) {
end++;
if (end > max_lines) {
end = 0;
}
if (size < max_lines) {
size++;
}
}
// time to print them back
int first = end - size;
if (first < 0) {
first += size + 1;
}
for (int count = size; count; count--) {
// lines might contain null bytes we can't use printf("%s", lines[first].line);
fwrite(lines[first].line, lines[first].sz, 1u, stdout);
first++;
if (first > size) {
first = 0;
}
}
// clear up memory after use
for (int idx = 0; idx <= max_lines; idx++) {
free(lines[idx].line);
}
free(lines);
return EXIT_SUCCESS;
}

file size exceeds buffer size

I want to compare 2 files for identical lines: mytab2411.txt(15,017,210 bytes in size) and shadow.txt (569 bytes in size) but when I compiled this code and ran the program, I get a segmentation fault. I know that it's because the "mytab2411.txt" file exceeds the size of "char buf" but how do I go about solving this problem without overflowing the buffer?
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
int cmp(const void * s1, const void * s2)
{
return strcasecmp(*(char **)s1, *(char **)s2);
}
int cmp_half(const char * s1, const char * s2)
{
int i;
for (i = 0; i < 3; i++)
{
int res = strncasecmp((char *)s1+i*3, (char *)s2+i*3, 2);
if (res != 0) return res;
}
return 0;
}
char * line[1024];
int n = 0;
int search(const char * s)
{
int first, last, middle;
first = 0;
last = n - 1;
middle = (first+last)/2;
while( first <= last )
{
int res = cmp_half(s, line[middle]);
if (res == 0) return middle;
if (res > 0)
first = middle + 1;
else
last = middle - 1;
middle = (first + last)/2;
}
return -1;
}
int main()
{
FILE * f1, * f2;
char * s;
char buf[1024*1024], text[1024];
f1 = fopen("shadow.txt", "rt");
f2 = fopen("mytab2411.txt", "rt");
s = buf;
while (fgets(s, 1024, f2) != NULL)
{
line[n] = s;
s = s+strlen(s)+1;
n++;
}
qsort(line, n, sizeof(char *), cmp);
while (fgets(text, 1024, f1) != NULL)
{
text[strlen(text)-1] = 0;
int idx = search(text);
if (idx >= 0)
{
printf("%s matched %s\n", text, line[idx]);
}
else
{
printf("%s not matched\n", text);
}
}
return 0;
}
Your method assumes that each line in the file is 1024 bytes long. In practice the lines can be up to 1024 bytes, but most lines are much shorter. Use strdup or malloc to allocate memory for each line based on line's length.
Store the lines in dynamically allocated arrays. This is about 15 MB of data and it should not be a problem unless there are resource limitations.
int main(void)
{
char buf[1024];
char **arr1 = NULL;
char **arr2 = NULL;
int size1 = 0;
int size2 = 0;
FILE * f1, *f2;
f1 = fopen("shadow.txt", "r");
f2 = fopen("mytab2411.txt", "r");
while(fgets(buf, 1024, f1))
{
size1++;
arr1 = realloc(arr1, sizeof(char*) * size1);
arr1[size1 - 1] = strdup(buf);
}
while(fgets(buf, 1024, f2))
{
size2++;
arr2 = realloc(arr2, sizeof(char*) * size2);
arr2[size2 - 1] = strdup(buf);
}
for(int i = 0; i < size1; i++)
for(int j = 0; j < size2; j++)
{
if(strcmp(arr1[i], arr2[j]) == 0)
printf("match %s\n", arr1[i]);
}
return 0;
}

how to search an element from a file in c

My code needs to do three things:
Read numbers from a file FILE1 into an array (dynamic)
Sort those numbers
Search for numbers input from a FILE2 in the sorted array.
.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
int main (int argc, char *argv[]) {
FILE *fp1 = fopen ("myFile1.txt", "r");
if (fp1 == NULL) {
printf ("cannot open this file");
exit (0);
}
FILE *fp2 = fopen ("test1.txt", "w");
if (fp2 == NULL) {
puts ("Not able to open this file");
exit (1);
}
int i = 0, num, j, k;
int *B = NULL;
int *C;
int a;
int size = 32;
B = malloc (sizeof (int) * size);
while (fscanf (fp1, "%d", &num) == 1) {
if (i < size) {
B[i] = num;
fprintf (fp2, "%d\r\n", num);
i++;
}
else {
C = malloc (sizeof (int) * 2 * size);
memcpy (C, B, size * sizeof (int));
free (B);
B = &C[0];
B[i] = num;
i++;
size = size * 2;
i++;
for (j = 0; j < size; ++j) {
for (k = j + 1; k < size; ++k) {
if (B[j] < B[k]) {
a = &B[j];
B[j] = B[k];
B[k] = a;
}
}
}
printf ("after sorting");
for (j = 0; j < size; ++j)
printf ("%d\n", B[j]);
}
}
return 0;
fclose (fp1); /* note this code is never reached */
fclose (fp2);
}
I successfully complete the first part of reading in the numbers from a file. But I am not able to understand how to sort these numbers.
I am trying to apply bubble sort, but it puts 0s in my array. How is my implementation incorrect?
& is the address-of operator. You pass it as a pointer. You need a = B[i], since a is an int.
Now you sort the numbers descending, if you want them to be ascending change the < to > in if (B[j] < B[k]).
Also you must always check whether malloc succeeded or not with e.g.:
if (!B) {
fprintf(stderr,"B alloc error");
exit(-1);
}
Also you might want to consider realloc.
In addition there is a built-in qsort in stdlib.h, which gives much better time than O(n^2).
Note: I haven't tested your file operations, since you said they work properly.

Resources