#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BS 12
void reverse(char * buffer, int size)
{
char tmp;
int i;
for(i = 0; i < size / 2; i++)
{
tmp = (char)buffer[i];
buffer[i] = buffer[size - i - 1];
buffer[size - i - 1] = tmp;
}
}
int compare_bin(char * buffer, char * buffer2, int size)
{
// because strncmp is only for string without \x00, so there must be a customized compare function
int i;
for(i = 0; i < size; i++)
{
if(buffer[i] != buffer2[i])
return 0;
}
return 1;
}
int main (const int argc, const char** argv)
{
if(argc != 3)
exit(-1);
int equal = 1;
char * buffer = malloc(BS), * buffer2 = malloc(BS);
FILE * f1, * f2;
f1 = fopen(argv[1], "r");
f2 = fopen(argv[2], "r");
fseek(f1, 0, SEEK_END);
fseek(f2, 0, SEEK_END);
long i = ftell(f1), j = ftell(f2);
if(i != j)
{
equal = 0;
goto endp;
}
fseek(f2, 0, SEEK_SET);
int need = 0;
int count;
int f2_pos = 0;
do
{
i = i - BS;
if(i < 0)
{
need = BS - abs((int)i);
i = 0;
}
else
need = BS;
fseek(f1, i, SEEK_SET);
count = fread(buffer, need, 1, f1);
reverse(buffer, count * need);
// fwrite(buffer, count * need, 1, f2);
fread(buffer2, need * need, 1, f2);
// printf("compare...\n");
// for(int i = 0; i < need * count; i++)
// {
// printf("%02hhX", buffer[i]);
// }
// printf("\n");
// for (int i = 0; i < need * count; i++)
// {
// printf("%02hhX", buffer2[i]);
// }
// printf("\n");
if(compare_bin(buffer, buffer2, need * count) == 0)
{
equal = 0;
break;
}
f2_pos += need * count;
fseek(f2, f2_pos, SEEK_SET);
if(i == 0)
break;
}while(i > 0);
fclose(f1);
fclose(f2);
free(buffer);
free(buffer2);
endp:
if(equal)
return 0;
else
{
printf("2 files not equal is reversed order\n");
return 1;
}
return 0;
}
So I write a program to compare file content in reverse order. I have already considered \x00 in binary file and strncmp isn't used. But there is still flaw. There is a test server to test this program. But I dont have access to it. This program always fails on that server. So there must be some special cases to make it fail. Any idea?
There are other ways around it. For instance, calculating MD5. But I want to fix this.
For the very first iteration where you read data you have
fread(buffer2, need * need, 1, f2);
The problem is that in that case need is 12, which is the size of the memory allocated for buffer2, but you ask to read 12 * 12 bytes.
If the second file is large enough, you will write out of bounds in the memory, leading to undefined behavior. If the file is not large enough then you won't read anything.
Also note that the order of the two middle arguments to fread matter. If you changed the order you would write out of bounds of the buffer both if the file is larger than need * need or not. You should really read count byte-sized object (the second argument should be 1 and the third should be count, which of course mean you need to change the order in the first call as well).
In short, your two fread calls should be
count = fread(buffer, 1, BS, f1);
fread(buffer2, 1, count, f2);
PS. Don't forget error checking.
Related
I need to sort ints from a file in ascending order and print them to the standard output. I can't modify the structure of the file.
The txt file looks like this:
41
65
68
35
51
...(one number in a row)
My program works just fine for small files, but I have to optomize it for larger files (like 3 million numbers) using malloc, but don't know exactly where and how. I'd like to ask for help in this. (I'm a beginner)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER 100000
int sort(int size, int arr[])
{
for (int i = 0; i < size - 1; i++)
{
for (int j = 0; j < size - i - 1; j++)
{
if (arr[j] > arr[j + 1])
{
int swap = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = swap;
}
}
}
}
int main(int argc, char *argv[])
{
char *filename = argv[1];
char s[20];
if (argc == 1)
{
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
int arr[BUFFER];
int i = 0;
int size = 0;
while ((fgets(s, BUFFER, fp)) != NULL)
{
s[strlen(s) - 1] = '\0';
arr[i] = atoi(s);
++i;
++size;
}
fclose(fp);
sort(size, arr);
for (int i = 0; i < size; ++i)
{
printf("%d\n", arr[i]);
}
return 0;
}
Your program could look like this:
#include <stdlib.h>
#include <stdio.h>
static int numcompar(const void *a, const void *b) {
const int *x = a;
const int *y = b;
// it is tempting to return *x - *y; but undefined behavior lurks
return *x < *y ? -1 : *x == *y ? 0 : 1;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
// TODO: handle error
abort();
}
char *filename = argv[1];
// open the file
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
abort();
}
// this will be our array
// note realloc(NULL is equal to malloc()
int *arr = NULL;
size_t arrcnt = 0;
// note - I am using fscanf for simplicity
int temp = 0;
while (fscanf(fp, "%d", &temp) == 1) {
// note - reallocating the space each number for the next number
void *tmp = realloc(arr, sizeof(*arr) * (arrcnt + 1));
if (tmp == NULL) {
free(arr);
fclose(fp);
abort();
}
arr = tmp;
// finally assignment
arr[arrcnt] = temp;
arrcnt++;
}
fclose(fp);
// writing sorting algorithms is boring
qsort(arr, arrcnt, sizeof(*arr), numcompar);
for (size_t i = 0; i < arrcnt; ++i) {
printf("%d\n", arr[i]);
}
free(arr);
}
Note that reallocating for one int at a time is inefficient - realloc is usually a costly function. The next step would be to keep the number of the size of the array and "used" (assigned to) elements of the array separately and reallocate the array by a ratio greater then 1. There are voices that prefer to use the golden ratio number in such cases.
To read an undetermined number of entries from the input file, you can allocate and reallocate an array using realloc() as more entries are read. For better performance it is recommended to increase the allocated size by a multiple instead of increasing linearly, especially one entry at a time.
Your sorting routine is inappropriate for large arrays: insertion sort has quadratic time complexity, so it might take a long time for 3 million items, unless they are already sorted. Use qsort() with a simple comparison function for this.
Here is a modified program:
#include <stdio.h>
#include <stdlib.h>
static int compare_int(const void *pa, const void *pb) {
int a = *(const int *)pa;
int b = *(const int *)pb;
// return -1 if a < b, 0 if a == b and +1 if a > b
return (a > b) - (a < b);
}
int main(int argc, char *argv[]) {
if (argc == 1) {
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
char buf[80];
size_t n = 0, size = 0;
int *array = NULL;
/* read the numbers */
while (fgets(buf, sizeof buf, fp)) {
if (n == size) {
/* increase size by at least 1.625 */
size_t newsize = size + size / 2 + size / 8 + 32;
int *newarray = realloc(array, newsize * sizeof(*array));
if (newarray == NULL) {
printf("cannot allocate space for %zu numbers\n", newsize);
free(array);
fclose(fp);
exit(1);
}
array = newarray;
size = newsize;
}
array[n++] = strtol(buf, NULL, 10);
}
fclose(fp);
/* sort the array */
qsort(array, n, sizeof(*array), compare_int);
for (size_t i = 0; i < n; i++) {
printf("%d\n", array[i]);
}
free(array);
return 0;
}
So here is the program to reverse content of a file block by block.
#include <stdio.h>
#include <stdlib.h>
#define BS 12
void reverse(char * buffer, int size)
{
char tmp;
int i;
for(i = 0; i < size / 2; i++)
{
tmp = (char)buffer[i];
buffer[i] = buffer[size - i - 1];
buffer[size - i - 1] = tmp;
}
}
int main (const int argc, const char** argv)
{
if(argc != 3)
exit(-1);
char * buffer = malloc(BS);
FILE * f1, * f2;
f1 = fopen(argv[1], "r");
f2 = fopen(argv[2], "w");
fseek(f1, 0, SEEK_END);
long i = ftell(f1);
// long f1_len = ftell(f1);
// unsigned char tmp;
int if_end = 1;
int need = 0;
int count;
do
{
i = i - BS;
if(i < 0)
{
need = BS - abs(i);
i = 0;
}
else
need = BS;
fseek(f1, i, SEEK_SET);
if(if_end) // strip EOF
{
count = fread(buffer, need - 1, 1, f1);
if_end = 0;
}
else
count = fread(buffer, need, 1, f1);
reverse(buffer, count);
fwrite(buffer, count, 1, f2);
if(i == 0)
break;
}while(i > 0);
fclose(f1);
fclose(f2);
free(buffer);
return 0;
}
testfile:
$ xxd testfile
0000000: 6162 6364 6566 670a abcdefg.
$ gcc test.c -o test
$ ./test testfile testfile2
$ xxd testfile2
0000000: 61 a
Any idea where is wrong? I have been debugging this for long time.
Your problem is that fwrite returns the number of successful blocks, not the number of bytes.
So reverse(buffer, count); needs to be reverse(buffer,count * need)
Similairly the write to the output needs to be fwrite(buffer, count * need, 1, f2);
I want to know what is the best option to read a txt file that contain two line of numbers using gets function in c and save them in an array within 1 second.
Assume the following example as an txt file called ooo.txt and it has the number 2.000.000 in the first line (which will be the size of the array) and 2.000.000 number in the second line that will be stored in the array.
Eg
2000000
59 595 45 492 89289 5 8959 (+1.999.993 numbers)
code i try (only the fcanf function)
int t_size;
fscanf(fp, "%d",&t_size); //bypass the first character!
int* my_array = NULL;
my_array = malloc(t_size*sizeof(*my_array));
if (my_array==NULL) {
printf("Error allocating memory!\n"); //print an error message
return 1; //return with failure
getchar();
}
int i =0;
for ( i = 0; i < t_size; i++ )
{
fscanf(fp, "%d",&my_array[i]); /*p[i] is the content of element at index i and &p[i] is the address of element at index i */
}
best, so far, code to make the procedure in 1 second
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
int is_end(char* input) {
return *input == 0;
}
int is_linebreak(char* input) {
return *input == '\r' || *input == '\n' || *input == ' ';
}
char* eat_linebreaks(char* input) {
while (is_linebreak(input))
++input;
return input;
}
size_t count_lines(char* input) {
char* p = input;
size_t rows = 1;
if (is_end(p))
return 0;
while (!is_end(p)) {
if (is_linebreak(p)) {
++rows;
p = eat_linebreaks(p);
}
else {
++p;
}
}
return rows;
}
/* split string by lines */
char** get_lines(char* input, size_t line_count) {
char* p = input;
char* from = input;
size_t length = 0;
size_t line = 0;
int i;
char** lines = (char**)malloc(line_count * sizeof(char*));
do {
if (is_end(p) || is_linebreak(p)) {
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
length = 0;
++line;
p = eat_linebreaks(p);
from = p;
}
else {
++length;
++p;
}
} while (!is_end(p));
// Copy the last line as well in case the input doesn't end in line-break
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
++line;
return lines;
}
int main(int argc, char* argv[]) {
clock_t start;
unsigned long microseconds;
float seconds;
char** lines;
size_t size;
size_t number_of_rows;
int count;
int* my_array;
start = clock();
FILE *stream;
char *contents;
int fileSize = 0;
int i;
// Open file, find the size of it
stream = fopen(argv[1], "rb");
fseek(stream, 0L, SEEK_END);
fileSize = ftell(stream);
fseek(stream, 0L, SEEK_SET);
// Allocate space for the entire file content
contents = (char*)malloc(fileSize + 1);
// Stream file into memory
size = fread(contents, 1, fileSize, stream);
contents[size] = 0;
fclose(stream);
// Count rows in content
number_of_rows = count_lines(contents);
// Get array of char*, one for each line
lines = get_lines(contents, number_of_rows);
// Get the numbers out of the lines
count = atoi(lines[0]); // First row has count
my_array = (int*)malloc(count * sizeof(int));
for (i = 0; i < count; ++i) {
my_array[i] = atoi(lines[i + 1]);
}
microseconds = clock() - start;
seconds = microseconds / 1000000.0f;
printf("Took %fs", seconds);
return 0;
}
First of all, you will want to use fgets instead to avoid dangerous buffer overflows. Second, you want to remove all punctuation from your numbers. Thus 2.000.000 becomes 2000000. Then you can use pointers and the strtol function to convert characters to integers; there are also other functions to convert to floats and other types.
Since code wants speed and IO is a typically bottle-neck, reading the entire file at once after using fstat() to find its length (#Charlon) makes some sense. Following is a quick parsing of that buffer.
// Stream file into memory
size = fread(contents, 1, fileSize, stream);
contents[size] = 0;
fclose(stream);
#if 1
// new code
size_t array_n;
int n;
if (sscanf(contents, "%zu%n", &array_n, &n) != 1) Handle_BadInput();
my_array = malloc(array_n * sizeof *my_array);
if (my_array == NULL) Handle_OOM();
char *p = &contents[n];
errno = 0;
char *endptr;
for (size_t count = 0; count < array_n; count++) {
my_array[count] = strtol(p, &endptr, 10);
if (p == endptr || errno)
Handle_BadInput();
p = endptr;
}
char ch;
if (sscanf(p, " %c", &ch) == 1) Handle_ExtraInput();
#else
//old code
// Count rows in content
number_of_rows = count_lines(contents);
// Get array of char*, one for each line
lines = get_lines(contents, number_of_rows);
// Get the numbers out of the lines
count = atoi(lines[0]); // First row has count
my_array = (int*)malloc(count * sizeof(int));
for (i = 0; i < count; ++i) {
my_array[i] = atoi(lines[i + 1]);
}
#endif
Still prefer the scale-able approach of reading one number at a time.
The fastest way needs a lot of RAM :
1) open the file (man open)
2) use the fstat function to get the size of you file (man fstat)
3) read the file with a buffer malloc-ed with the size you just get at 2) (man malloc)
4) close the file (man close)
5) parse your buffer and transform each block of digits (each time until ' ' or '\0') to int
EDIT : if your RAM is not enough large, you need to create a get_next_int function that only stores in your buffer the next number in the file
EDIT 2 : You can read until you know the number of int you will need to store and compares this number with a security coef to the size of your ram, and use the good way so that your program won't set errno to ENOMEM if you know what I'm talking about ;)
I wrote a code in C that read a text file with numbers into memory and the create an 2d int array to store them.
The file has the following format:
9
9 5 6 2235 45558 6 5544 56565 2
The first number is the size of the array and the second line holds as many numbers as the first line says.
MY problem is that the size of the array can't hold more than ~30.000 numbers. How can I make the following code so I can make the array hold until 1.000.000 numbers? I know that I should use some king of long integer but I couldn't do it.
Heres the code
#include <stdio.h>
#include <stdlib.h>
int is_end(char* input) {
return *input == 0;
}
int is_separator(char* input) {
return *input == '\n' || *input == ' ';
}
char* eat_separators(char* input) {
while (is_separator(input))
++input;
return input;
}
size_t count_lines(char* input) {
size_t rows = 1;
while (!is_end(input)) {
if (is_separator(input)) {
++rows;
input = eat_separators(input);
}
else {
++input;
}
}
return rows;
}
char** get_lines(char* input, size_t number_of_rows) {
char* from = input;
size_t length = 0;
size_t line = 0;
size_t i;
char** lines = (char**)malloc(number_of_rows * sizeof(char*));
do {
if (is_end(input) || is_separator(input)) {
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
length = 0;
++line;
input = eat_separators(input);
from = input;
}
else {
++length;
++input;
}
} while (!is_end(input));
/*
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
++line; */
return lines;
}
int main(int argc, char* argv[]) {
char** lines;
size_t size;
size_t number_of_rows;
int count;
int* children;
FILE *input, *output;
char *contents;
int fileSize = 0;
int i;
input = fopen("xxx.in", "r");
long int filepos = 0L;
fseek(input, 0L, SEEK_END);
fileSize = ftell(input);
fseek(input, 0L, SEEK_SET);
contents = (char*)malloc(fileSize + 1);
size = fread(contents, 1, fileSize, input);
contents[size] = 0;
fclose(input);
number_of_rows = count_lines(contents);
lines = get_lines(contents, number_of_rows);
if ((count = atoi(lines[0])) <= 0 || count > 1000000){
return 1;
}
children = (int*)malloc(count * sizeof(int));
for (i = 0; i < count; ++i) {
if ((children[i] = atoi(lines[i + 1])) <= 0 )
return(-1);
}
// a check to see if everything stored in the array
for(i = 0;i<count;i++)
{
printf(" %d : %d\n", i, children[i]);
}
free(children);
free(lines);
// This is the end! Oh my dear friend, the end!
return 0;
}
First Let me explaint the reason of having only 30.000 number that will give reply to your question?
Basically you are trying to convert the character to ASCII values. Let us take the example of character x whos ASCII value is 120. You are changing the character x with 120, the storage capacity of x is 1 byte but the storage capacity of 120 is 3 bytes. So, basically you have to do memory allocation of 3 times higher the actual value computed as 1 byte is expanding into 3 bytes.
In Your code increase the memory allocation 3 times then your problem would be solved.
I have a problem with getting every other line empty on output with this code. The desired output is: http://paste.ubuntu.com/1354365/
While I get: http://paste.ubuntu.com/1356669/
Does anyone have an idea of why I'm getting these empty lines on every other line?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
FILE *fp;
FILE *fw;
int main(int argc, char *argv[]){
char buffer[100];
char *fileName = malloc(10*sizeof(char));
char **output = calloc(10, sizeof(char*));
char **outputBuffer = calloc(10, sizeof(char*));
fw = fopen("calvin.txt", "w+");
for(int y = 0; y < 6; y++){
for(int i = 0; i < 10; i ++)
{
output[i] = malloc(100);
}
for(int x = 0; x < 12; x++){
sprintf(fileName,"part_%02d-%02d", x, y);
fp = fopen(fileName, "rb");
if(fp == NULL)
{
printf("Kan ikke åpne den filen(finnes ikke/rettigheter)\n");
}
else if(fp != NULL){
memset(buffer, 0, 100);
for(int i = 0; i < 10; i++){
outputBuffer[i] = malloc(100);
}
fread(buffer, 1, 100, fp);
for(int i = 0; i < 100; i++){
if(buffer[i] == '\0')
{
buffer[i] = ' ';
}
else if(buffer[i] == '\n')
{
buffer[i] = ' ';
}
}
for(int i = 0; i < 10; i++) {
strncpy(outputBuffer[i], buffer + i * 10, 10);
strncat(output[i], outputBuffer[i]+1, 11);
}
}
}
for(int i = 0; i < 10; i++){
printf("%s\n", output[i]);
}
}
fclose(fp);
free(fileName);
}
You are not reading correcting from the file. On the first image in the beginning you have:
o ""oo " o o o
on the second
""oo o o o
That does not make a lot of sense because it is the first line. It is not related to empty lines since we are talking about the first line.
It seems that you are reading -2 characters from the left so " prints over o the other " on the ' ' ect..
Try this away, may not be the most efficient solution:
int read(char *file)
{
FILE *fp = NULL;
int size = 0, pos = 0,i;
fp = fopen(file,"r");
if (!fp) return 0;
for(; ((getc(fp))!=EOF); size++); // Count the number of elements in the file
fclose(fp);
char buffer[size];
fp = fopen(file,"r");
if (!fp) return 0;
while((buffer[pos++]=getc(fp))!=EOF); // Saving the chars into the buffer
for(i = 0; i < pos; i++) // print them.
printf("%c",buffer[i]);
fclose(fp);
return 1;
}
This part seems problematic:
strncpy(outputBuffer[i], buffer + i * 10, 10);
strncat(output[i], outputBuffer[i]+1, 11);
1) Why is it necessary to use the extra outputBuffer step?
2) You know that strncpy() isn't guaranteed to null-terminate the string it copies.
3) More significantly, output[i] hasn't been initialized, so strncat() will concatenate the string after whatever junk is already in there. If you use calloc() instead of malloc() when creating each output[i], that might help. It's even possible that your output[i] variables are what hold your extra newline.
4) Even if initialized to an empty string, you could easily overflow output[i], since you're looping 12 times and writing up to 11 characters to it. 11 * 12 + 1 for the null terminator = 133 bytes written to a 100-byte array.
In general, unless this is a class assignment that requires use of malloc(), I don't understand why you aren't just declaring your variables once, at the start of the program and zeroing them out at the start of each loop:
char fileName[10];
char output[10][100];
char outputBuffer[10][100];
And, as stated by others, your allocating a bunch of memory and not trying to free it up. Allocate it once outside of your loop or just skip the allocation step and declare them directly.