fread and fwrite result file size are different - c

I am writing a program in Visual Studio.
I copied a file using fread and fwrite.
The output file size is bigger then input file.
Can you explain the reason?
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
int main()
{
char *buffer;
int fsize;
FILE *fp = fopen("test.txt", "r");
FILE *ofp = fopen("out.txt", "w");
fseek(fp, 0, SEEK_END);
fsize = ftell(fp);
buffer = (char *)malloc(fsize);
memset(buffer, 0, fsize); // buffer를 0으로 초기화
fseek(fp, 0, SEEK_SET);
fread(buffer, fsize, 1, fp);
fwrite(buffer, fsize, 1, ofp);
fclose(fp);
fclose(ofp);
free(buffer);
}

You open the files in text mode, which on the Windows operating system using Visual Studio involves non trivial translation phases, including end of line conversion. If your files have binary contents, such as executable, image and document files, end of line conversion replaces '\n' bytes with CR LF pairs, thereby increasing the output size.
You can avoid this issue by opening the files in binary mode with "rb" and "wb" mode strings.
Also note that a stream must be open in binary mode for ftell() to reliably return the file size, assuming the file supports seeking and is not larger than LONG_MAX which is only 2GB on Windows. Using stat to retrieve the file size from the OS is a better approach for POSIX systems. Copying the file one block at a time is also more reliable: it works for streams that do not support seeking and allows for copying files larger than available memory.
Here is a modified version with error checking:
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
int main() {
const char *inputfile = "test.txt";
const char *outputfile = "out.txt";
FILE *fp = fopen(inputfile, "rb");
if (fp == NULL) {
fprintf(stderr, "cannot open %s: %s\n", inputfile, strerror(errno);
return 1;
}
FILE *ofp = fopen(outputfile, "wb");
if (ofp == NULL) {
fprintf(stderr, "cannot open %s: %s\n", outputfile, strerror(errno);
return 1;
}
if (fseek(fp, 0, SEEK_END)) {
fprintf(stderr, "%s: cannot seek to the end of file: %s\n",
inputfile, strerror(errno);
return 1;
}
size_t fsize = ftell(fp);
char *buffer = calloc(fsize, 1);
if (buffer == NULL) {
fprintf(stderr, "cannot allocate %zu bytes: %s\n",
fsize, strerror(errno);
return 1;
}
rewind(fp);
size_t nread = fread(buffer, fsize, 1, fp);
if (nread != fsize) {
fprintf(stderr, "%s: read %zu bytes, file size is %zu bytes\n".
inputfile, nread, fsize);
}
size_t nwritten = fwrite(buffer, nread, 1, ofp);
if (nwritten != nread) {
fprintf(stderr, "%s: wrote %zu bytes, write size is %zu bytes\n".
outputfile, nwritten, nread);
}
fclose(fp);
if (fclose(ofp)) {
fprintf(stderr, "%s: error closing file: %s\n".
outputfile, strerror(errno));
}
free(buffer);
return 0;
}

Related

Trying to read and write image as byte array in C

The following code is supposed to load and save an image file (and not only) into a copy file:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main()
{
FILE* file = fopen("pexels.jpg", "r");
if (!file) {
perror("File opening failed");
return EXIT_FAILURE;
}
fseek(file, 0, SEEK_END);
long file_size = ftell(file);
fseek(file, 0, SEEK_SET);
void* data = malloc(file_size);
memset(data, 0, file_size);
fread(data, 1, file_size, file);
fclose(file);
FILE *copy = fopen("copy.jpg", "w");
if (!copy) {
perror("File opening failed");
free(data);
return EXIT_FAILURE;
}
fwrite(data, 1, file_size, copy);
free(data);
fclose(copy);
}
the file gets loaded and saved as an image using only array of bytes but the result gets corrupted.
I wonder what could be wrong here.
On windows you need to call fopen() with the 'b' flag to read and write binary files. You don't need memset() (and if you did, prefer calloc() instead). You will probably see similar performance writing 4k or 8k at a time and eliminate the error case of running out of memory if your input file is huge. In either case I recommend you check the return value from fread() and fread() and wrap the read/write operation in a loop.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main()
{
FILE* file = fopen("pexels.jpg", "br");
if (!file) {
perror("File opening failed");
return EXIT_FAILURE;
}
fseek(file, 0, SEEK_END);
long file_size = ftell(file);
fseek(file, 0, SEEK_SET);
void* data = malloc(file_size);
fread(data, 1, file_size, file);
fclose(file);
FILE *copy = fopen("copy.jpg", "bw");
if (!copy) {
perror("File opening failed");
free(data);
return EXIT_FAILURE;
}
fwrite(data, 1, file_size, copy);
free(data);
fclose(copy);
}

Reading a pdf file with fread in C does not end up as expected

I am trying to read from a pdf file and write into another file where I run to the problem.
In the while loop, fread reads only 589 bytes which is expected to be 1024 for the first time.
In the second loop, fread reads 0 bytes.
I am sure that the pdf file is beyond 1024 bytes.
Here is a similar problem. The phenomenon is the same. But I do not use strlen() which causes that problem.
So how can I resolve the problem?
My code is here:
#include <stdio.h>
#define MAXLINE 1024
int main() {
FILE *fp;
int read_len;
char buf2[MAXLINE];
FILE *fp2;
fp2 = fopen("test.pdf", "w");
if ((fp = fopen("LearningSpark.pdf", "r")) == NULL) {
printf("Open file failed\n");
}
while ((read_len = fread(buf2, sizeof(char), MAXLINE, fp)) > 0) {
int write_length = fwrite(buf2, sizeof(char), read_len, fp2);
if (write_length < read_len) {
printf("File write failed\n");
break;
}
}
return 0;
}
fopen(filename, "r") is system dependent. See this post on what may happen to the data you read if you are on Windows, for example. Basically it is related to how certain characters are translated on different systems in text mode, ie., \n is "End-of-Line" on Unix-type systems, but on Windows it is \r\n.
Important: On Windows, ASCII char 27 will result in End-Of-File, if reading in text mode, "r", causing the fread() to terminate prematurely.
To read a binary file, use the "rb" specifier. Similarly for "w", as mentioned here, you should use "wb" to write binary data.
Binary files such as pdf files must be open in binary mode to prevent end of line translation and other text mode handling on legacy systems such as Windows.
Also note that you should abort when fopen() fails and you should close the files.
Here is a modified version:
#include <errno.h>
#include <stdio.h>
#include <string.h>
#define MAXLINE 1024
int main() {
char buf2[MAXLINE];
int read_len;
FILE *fp;
FILE *fp2;
if ((fp = fopen("LearningSpark.pdf", "rb")) == NULL) {
fprintf(stderr, "Open file failed for %s: %s\n", "LearningSpark.pdf", strerror(errno));
return 1;
}
if ((fp2 = fopen("test.pdf", "wb")) == NULL) {
fprintf(stderr, "Open file failed for %s: %s\n", "test.pdf", strerror(errno));
fclose(fp);
return 1;
}
while ((read_len = fread(buf2, 1, MAXLINE, fp)) > 0) {
int write_length = fwrite(buf2, 1, read_len, fp2);
if (write_length < read_len) {
fprintf(stderr, "File write failed: %s\n", strerror(errno));
break;
}
}
fclose(fp);
fclose(fp2);
return 0;
}

Cannot Copy a Text File through fwrite and fread in C [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 5 years ago.
Improve this question
Here is the Code:
#include <stdio.h>
#include <stdlib.h>
int main() {
FILE *f, *fp;
char buff[512];
int buff_size;
int bytes;
fp = fopen("File.txt", "rb");
if (fp == NULL) {
printf("Cannot Open Source File!\n");
exit(1);
}
f = fopen("append.txt", "ab+");
if (f == NULL) {
printf("Cannot Open Target File!\n");
fclose(fp);
exit(1);
}
buff_size = sizeof(buff);
while (bytes = fread(&buff, buff_size, 1, fp) > 0) {
if (bytes > 0)
fwrite(&buff, buff_size, 1, f);
else
break;
printf("Appending...\n\n");
}
rewind(f);
while (bytes = fread(&buff, buff_size, 1, f) > 0)
if (bytes > 0)
printf("%s", buff);
fclose(fp);
fclose(f);
}
So, it happens to be that this doesn't output anything and when I check the file "append.txt" it also does not contain anything.
Note that the Source File "File.txt" is not empty.
Can anyone tell me what is wrong with it?
EDIT:
I fixed the problem by replacing buff_size with strlen(buff) as this:
bytes = fread(&buff, strlen(buff), 1, f) > 0 and the same in fwrite() and second fread().
Can someone explain why this worked?
char buff[512];
int buff_size;
// [...]
bytes = fread(&buff, buff_size, 1, fp)
This will attempt to read one block of 512 bytes. The return value is the number of blocks read, so it won't be bytes. But leaving this aside, if your file is shorter than 512 bytes, this won't read anything.
What you want is read 512 times 1 byte, then you will get the byte count back, so swap places for buff_size and 1.
Side notes:
if you do your checks in the loop condition correctly like:
while ((bytes = fread(buff, 1, buff_size, fp)) > 0 )
the extra check for if (bytes > 0) is redundant.
when writing, you only want to write the amount of bytes you actually read:
fwrite(buff, 1, bytes, f);
For sizes, always use size_t -- int could very well be wrong:
size_t buff_size;
size_t bytes;
printing your buff with printf("%s", ) is undefined behavior because you don't add a '\0' byte after the data read by fread(). A C string must end with '\0'. When the data read by fread() doesn't contain a '\0' by accident, printf() will read and use uninitialized data and possibly even read beyond the bounds of your buff.
Felix Palmen listed a number of problems in your code, your fix is completely wrong as buff does not even have a null terminator.
Here is a better version:
#include <stdio.h>
#include <stdlib.h>
int main(void) {
FILE *f, *fp;
char buff[512];
size_t bytes;
fp = fopen("File.txt", "rb");
if (fp == NULL) {
fprintf(stderr, "Cannot open source file!\n");
exit(1);
}
f = fopen("append.txt", "ab+");
if (f == NULL) {
fprintf(stderr, "Cannot open target file!\n");
fclose(fp);
exit(1);
}
while ((bytes = fread(buff, 1, sizeof buff, fp)) != 0) {
if (fwrite(buff, 1, bytes, 1, f) != bytes) {
fprintf(stderr, "Error writing to the target file\n");
break;
}
printf("Appending...\n\n");
}
rewind(f);
while ((bytes = fread(buff, 1, sizeof buff, f)) != 0) {
printf("%.*s", (int)bytes, buff);
}
fclose(fp);
fclose(f);
return 0;
}
bytes = fread(&buff, buff_size, 1, fp) > 0
Take a look at the C Precedence Chart. The operator > is above =, so the return value of fread will be compared to zero, and then the result of that comparison will be stored in bytes. You intended to write this
(bytes = fread(&buff, buff_size, 1, fp)) > 0
Here's my solution to your problem, given fixed file names. Were it my own code, it would take the file names as arguments.
#include <stdio.h>
int main(void)
{
const char src_file[] = "File.txt";
FILE *fp = fopen(src_file, "rb");
if (fp == NULL)
{
fprintf(stderr, "Cannot Open Source File '%s'!\n", src_file);
return(1);
}
const char tgt_file[] = "append.txt";
FILE *f = fopen(tgt_file, "ab+");
if (f == NULL)
{
fprintf(stderr, "Cannot Open Target File '%s'!\n", tgt_file);
fclose(fp);
return(1);
}
char buff[512];
int bytes;
while ((bytes = fread(buff, sizeof(char), sizeof(buff), fp)) > 0)
{
fwrite(buff, sizeof(char), bytes, f);
printf("Appending...\n\n");
}
rewind(f);
while ((bytes = fread(buff, sizeof(char), sizeof(buff), f)) > 0)
printf("%.*s", bytes, buff);
fclose(fp);
fclose(f);
return 0;
}
There are a variety of fixes, most of them articulated in comments somewhere along the line.
I made the file names into arrays so that the name could be used in both fopen() and in the error messages, which are printed to stderr, not stdout. This is helpful to other users in generalized code. If the file names come from command line arguments, it is trivial.
The calls to fread() were fixed so that the number of bytes is reported, rather than the number of 512-byte blocks (which will be 0 or 1). This involved reversing the order of the size/count arguments to fread(). The buffer was passed rather than the address of the buffer, too.
The number of bytes read was captured correctly.
The number of bytes read was used to control the size of the fwrite().
The number of bytes read was used to control the number of bytes printed by printf().
I don't like the special rule in C99 and later that allows main() — but only main() — to return 0 by default. AFAIAC, it's a function defined as returning an int; it should return an int. However, there are others who disagree.

Copy textfile using fwrite, ending up with trash? C

Iam trying to copy a text file to a new file. I was thinking that if I want to do it smart, I just copy everything binary so the copy will be identical to the first. However I'am ending up with weird character in the new document.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char * argv[])
{
FILE * fporgi, * fpcopy;
if((fporgi = fopen(argv[1], "rb")) == NULL){
//Error checking
fprintf(stdout, "Error occurred trying to open file :%s", argv[1]);
exit(EXIT_FAILURE);
}
if((fpcopy = fopen(argv[2], "wb")) == NULL){
fprintf(stdout, "Error occurred trying to open file :%s", argv[2]);
exit(EXIT_FAILURE);
}
long bytes;
fseek(fporgi, 0L, SEEK_END);
bytes = ftell(fporgi);
fprintf(stdout, "\n%ld\n", bytes);
unsigned char buffer[bytes];
fprintf(stdout, "\n%u\n", sizeof(buffer));
fread(buffer, sizeof(buffer), 1, fporgi);
fwrite(buffer, sizeof(buffer), 1, fpcopy);
fclose(fporgi);
fclose(fpcopy);
return 0;
}
Example if the original file contains "hej svej" the new file will have : "(œÌuR0#NUL"
You need to seek back to the start of the file after reading the length:
fseek(fporgi, 0L, SEEK_END);
bytes = ftell(fporgi);
fprintf(stdout, "\n%ld\n", bytes);
fseek(fporgi, 0L, SEEK_SET);

How can I get a file's size in C? [duplicate]

This question already has answers here:
How do you determine the size of a file in C?
(15 answers)
Closed 3 years ago.
How can I find out the size of a file I opened with an application written in C ?
I would like to know the size, because I want to put the content of the loaded file into a string, which I allocate using malloc(). Just writing malloc(10000*sizeof(char)); is IMHO a bad idea.
You need to seek to the end of the file and then ask for the position:
fseek(fp, 0L, SEEK_END);
sz = ftell(fp);
You can then seek back, e.g.:
fseek(fp, 0L, SEEK_SET);
or (if seeking to go to the beginning)
rewind(fp);
Using standard library:
Assuming that your implementation meaningfully supports SEEK_END:
fseek(f, 0, SEEK_END); // seek to end of file
size = ftell(f); // get current file pointer
fseek(f, 0, SEEK_SET); // seek back to beginning of file
// proceed with allocating memory and reading the file
Linux/POSIX:
You can use stat (if you know the filename), or fstat (if you have the file descriptor).
Here is an example for stat:
#include <sys/stat.h>
struct stat st;
stat(filename, &st);
size = st.st_size;
Win32:
You can use GetFileSize or GetFileSizeEx.
If you have the file descriptor fstat() returns a stat structure which contain the file size.
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
// fd = fileno(f); //if you have a stream (e.g. from fopen), not a file descriptor.
struct stat buf;
fstat(fd, &buf);
off_t size = buf.st_size;
I ended up just making a short and sweet fsize function(note, no error checking)
int fsize(FILE *fp){
int prev=ftell(fp);
fseek(fp, 0L, SEEK_END);
int sz=ftell(fp);
fseek(fp,prev,SEEK_SET); //go back to where we were
return sz;
}
It's kind of silly that the standard C library doesn't have such a function, but I can see why it'd be difficult as not every "file" has a size(for instance /dev/null)
How to use lseek/fseek/stat/fstat to get filesize ?
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
void
fseek_filesize(const char *filename)
{
FILE *fp = NULL;
long off;
fp = fopen(filename, "r");
if (fp == NULL)
{
printf("failed to fopen %s\n", filename);
exit(EXIT_FAILURE);
}
if (fseek(fp, 0, SEEK_END) == -1)
{
printf("failed to fseek %s\n", filename);
exit(EXIT_FAILURE);
}
off = ftell(fp);
if (off == -1)
{
printf("failed to ftell %s\n", filename);
exit(EXIT_FAILURE);
}
printf("[*] fseek_filesize - file: %s, size: %ld\n", filename, off);
if (fclose(fp) != 0)
{
printf("failed to fclose %s\n", filename);
exit(EXIT_FAILURE);
}
}
void
fstat_filesize(const char *filename)
{
int fd;
struct stat statbuf;
fd = open(filename, O_RDONLY, S_IRUSR | S_IRGRP);
if (fd == -1)
{
printf("failed to open %s\n", filename);
exit(EXIT_FAILURE);
}
if (fstat(fd, &statbuf) == -1)
{
printf("failed to fstat %s\n", filename);
exit(EXIT_FAILURE);
}
printf("[*] fstat_filesize - file: %s, size: %lld\n", filename, statbuf.st_size);
if (close(fd) == -1)
{
printf("failed to fclose %s\n", filename);
exit(EXIT_FAILURE);
}
}
void
stat_filesize(const char *filename)
{
struct stat statbuf;
if (stat(filename, &statbuf) == -1)
{
printf("failed to stat %s\n", filename);
exit(EXIT_FAILURE);
}
printf("[*] stat_filesize - file: %s, size: %lld\n", filename, statbuf.st_size);
}
void
seek_filesize(const char *filename)
{
int fd;
off_t off;
if (filename == NULL)
{
printf("invalid filename\n");
exit(EXIT_FAILURE);
}
fd = open(filename, O_RDONLY, S_IRUSR | S_IRGRP);
if (fd == -1)
{
printf("failed to open %s\n", filename);
exit(EXIT_FAILURE);
}
off = lseek(fd, 0, SEEK_END);
if (off == -1)
{
printf("failed to lseek %s\n", filename);
exit(EXIT_FAILURE);
}
printf("[*] seek_filesize - file: %s, size: %lld\n", filename, (long long) off);
if (close(fd) == -1)
{
printf("failed to close %s\n", filename);
exit(EXIT_FAILURE);
}
}
int
main(int argc, const char *argv[])
{
int i;
if (argc < 2)
{
printf("%s <file1> <file2>...\n", argv[0]);
exit(0);
}
for(i = 1; i < argc; i++)
{
seek_filesize(argv[i]);
stat_filesize(argv[i]);
fstat_filesize(argv[i]);
fseek_filesize(argv[i]);
}
return 0;
}
Have you considered not computing the file size and just growing the array if necessary? Here's an example (with error checking ommitted):
#define CHUNK 1024
/* Read the contents of a file into a buffer. Return the size of the file
* and set buf to point to a buffer allocated with malloc that contains
* the file contents.
*/
int read_file(FILE *fp, char **buf)
{
int n, np;
char *b, *b2;
n = CHUNK;
np = n;
b = malloc(sizeof(char)*n);
while ((r = fread(b, sizeof(char), CHUNK, fp)) > 0) {
n += r;
if (np - n < CHUNK) {
np *= 2; // buffer is too small, the next read could overflow!
b2 = malloc(np*sizeof(char));
memcpy(b2, b, n * sizeof(char));
free(b);
b = b2;
}
}
*buf = b;
return n;
}
This has the advantage of working even for streams in which it is impossible to get the file size (like stdin).
If you're on Linux, seriously consider just using the g_file_get_contents function from glib. It handles all the code for loading a file, allocating memory, and handling errors.
#include <stdio.h>
#define MAXNUMBER 1024
int main()
{
int i;
char a[MAXNUMBER];
FILE *fp = popen("du -b /bin/bash", "r");
while((a[i++] = getc(fp))!= 9)
;
a[i] ='\0';
printf(" a is %s\n", a);
pclose(fp);
return 0;
}
HTH

Resources