I wrote a function which should check if a word is included in a file, but my function returns always NOT_EXISTENT, why? I checked ptr and its always empty but the memory is located.
Here my function:
int search_for_word(char wort[]) {
char *ptr;
FILE *file;
unsigned long size_of_file = 0;
file = fopen("array.txt", "r");
if (file == NULL) {
return ERROR;
}
fseek(file, 0L, SEEK_END);
size_of_file = ftell(file);
ptr = malloc(sizeof(char) * size_of_file + 1);
printf("Size:%li\n", size_of_file);
if (ptr == NULL) {
return ERROR;
}
fread(ptr, sizeof(char), size_of_file, file);
if (strstr(ptr, wort) == NULL) {
return NOT_EXISTENT;
}
fclose(file);
return EXISTENT;
}
At least these problems:
(Biggest issue) Missing rewind #alinsoar
fread() is attempting a read from the end of the file. Move back to the beginning.
rewind(file); // Add
size_t length = fread(ptr, sizeof(char), size_of_file, file);
Not a string #pm100
ptr is not certainly a string as it may lack a null character. strstr() expects 2 strings.
strstr(ptr, wort) // bad
Instead, append a null character to the data read before strstr().
size_t length = fread(ptr, sizeof(char), size_of_file, file);
ptr[length] = '\0'; // Add
Failure to close
Code selectively performs fclose(file). Call fclose() with each successful fopen().
Missing free() #Weather Vane
Free allocated memory when done.
wort[] may be ill formed
Posted code does not show the origin of wort[]. So recommendations are guesses at best.
No check on fseek() success
// fseek(file, 0L, SEEK_END)
if (fseek(file, 0 /* L not needed */, SEEK_END) == -1) {
Handle_error();
}
Better with a const #chqrlie
This allows passing constant strings.
// int search_for_word(char wort[]){
int search_for_word(const char wort[]) {
Minor
Size sizeof(char) * size_of_file + 1 may exceed SIZE_MAX.
sizeof(char) * size_of_file + 1 conceptually wrong. Better as sizeof(char) * (size_of_file + 1) or just size_of_file + 1u.
Some rough alternative code - unchecked.
// Let calling code open the file
// Return 1 on success.
// Return 0 on no-find.
// Return -1 on other failures.
int search_for_word(const char *word, FILE *inf) {
if (inf == 0) {
return -1;
}
size_t length_word = strlen(word);
if (length_word >= SIZE_MAX / 2) {
return -1; // TBD code to handle this extreme case
}
size_t buf_size = 4096; // Adjust as desired
if (buf_size <= length_word * 2) {
buf_size = length_word * 2 + 1;
}
char *buf = malloc(buf_size);
if (buf == NULL) {
return -1;
}
char *in = buf;
size_t in_length = 0;
for (;;) {
size_t length_read = fread(in, 1, buf_size, inf);
in[length_read] = '\0';
if (strstr(buf, word)) {
free(buf);
return 1;
}
if (length_read < buf_size) { // no more data expected
free(buf);
return 0;
}
// Copy last portion of buffer to the beginning.
in_length += length_read;
memmove(buf, &buf[in_length - length_word], length_word);
in_length = length_word;
in = buf + in_length;
}
}
Here is a modified version implementing suggestions from chux's answer and with an alternative method for huge files (which should probably be used for all files):
int search_for_word(const char *wort) {
int res = NOT_EXISTENT;
FILE *file = fopen("array.txt", "r");
if (file == NULL) {
return ERROR;
}
#if 0 // set to 1 if you want to load the whole file in memory
if (fseek(file, 0L, SEEK_END) == -1) {
fclose(file);
return ERROR;
}
long size_of_file = ftell(file);
if (size_of_file < 0) {
fclose(file);
return ERROR;
}
rewind(file);
if ((unsigned long)size_of_file + 1 <= SIZE_MAX) {
char *ptr = malloc((size_t)size_of_file + 1);
if (ptr != NULL) {
size_t length = fread(ptr, 1, size_of_file, file);
ptr[length] = '\0';
res = strstr(ptr, wort) ? EXISTENT : NOT_EXISTENT;
free(ptr);
fclose(file);
return res;
}
}
#endif
/* use a different method: read 4KB at a time */
size_t len = strlen(wort);
char buf[4096 + len + 1];
size_t nread, pos = 0;
while ((nread = fread(buf + pos, 1, 4096, file)) > 0) {
buf[pos + nread] = '\0';
if (strstr(buf, wort)) {
res = EXISTENT;
break;
}
if (pos + nread <= len) {
pos += nread;
} else {
memmove(buf, buf + pos + nread - len, len);
pos = len;
}
}
fclose(file);
return res;
}
"Warning C6386 Buffer overrun while writing to 'buffer': the writable size is 'Param(1)*Param(2)' bytes, but '4294967295' bytes might be written."
I'm writing a code to calculate a postfix expression using a stack implemented using linked lists, and I'm reading the postfix expression from a local file in binary ( fopen(filename, "rb" ) into a buffer.
I get the above mentioned warning at this line of code:
fread(buffer, sizeof(char), fileLength, file);
But, I've used calloc to allocate exactly the amount of memory I'd need based on the length of the file like this:
fseek(file, 0, SEEK_END);
fileLength = ftell(file);
buffer = (char*)calloc(fileLength + 1, sizeof(char));
if (!buffer) {
perror("Can't allocate memory!\n");
return NULL;
}
I don't understand where it got the "'4294967295' bytes might be written". Anyone care enough to explain what might be the cause, I'm a student and I'm not that much experienced with C.
Here's the entire function block:
int CalculatePostfix(double* destination, char* fileName)
{
FILE* file = NULL;
int fileLength = 0;
char* buffer = NULL;
char* currentBuffer = NULL;
int numBytes = 0;
char operation = 0;
double number = 0;
int status = EXIT_SUCCESS;;
StackElement head = { .number = 0, .next = NULL };
file = fopen(fileName, "rb");
if (!file) {
perror("Can't open file!\n");
return -1;
}
fseek(file, 0, SEEK_END);
fileLength = ftell(file);
buffer = (char*)calloc(fileLength + 1, sizeof(char));
if (!buffer) {
perror("Can't allocate memory!\n");
return NULL;
}
rewind(file);
fread(buffer, sizeof(char), fileLength, file);
printf("|%s|\n", buffer);
fclose(file);
currentBuffer = buffer;
while (strlen(currentBuffer) > 0) {
status = sscanf(currentBuffer, " %lf %n", &number, &numBytes);
if (status == 1) {
Push(&head, number);
currentBuffer += numBytes;
}
else {
sscanf(currentBuffer, " %c %n", &operation, &numBytes);
status = PerformOperation(&head, operation);
if (status != EXIT_SUCCESS) {
free(buffer);
while (head.next != NULL) {
DeleteAfter(&head);
}
return -1;
}
currentBuffer += numBytes;
}
}
free(buffer);
return EXIT_SUCCESS;
}
ftell returns a long integer, fread takes a size_t which, depending on implementation, often is an unsigned int. So if you happen to get -1L (which is the error return code from ftell) back from ftell you will end up vid a massive large unsigned int.
So to solve this, check return value of ftell and make sure it is not -1L, then when calling fread cast to size_t
I am trying to copy binary files from src to dst. This script seems to copy all of the bytes. BUT when I open both files in Hex Workshop I see that dst file is always missing 3 bytes at the end of the file. These 3 bytes should have been 00 00 00, this problem prevents me from opening dst file.
void binaryCopy(char **argv) {
int *buf = 0;
int elements = 0;
int size = 0, wantOverwrite = 0;
FILE *src = fopen(argv[SRC_POS], "rb");
FILE *dst = fopen(argv[DST_POS], "w+b");
if (src) {
if (dst) {
wantOverwrite = overwrite();
}
if (wantOverwrite) {
fseek(src, 0L, SEEK_END);
size = ftell(src);
fseek(src, 0L, SEEK_SET);
buf = (int *)malloc(size);
elements = fread(buf, BYTE_SIZE, size / BYTE_SIZE, src);
fwrite(buf, BYTE_SIZE, elements, dst);
printf("copy completed");
free(buf);
}
}
fclose(dst);
fclose(src);
}
There are several problems in your function as written.
fopen(dstFilename, "w+b"); truncates the file, so your overwrite check later is meaningless.
You're not checking for NULL after malloc, and your buffer should be an unsigned char* since that is what fread/fwrite will interpret it as.
At the end, both fclose functions could be called with NULL file pointers likely resulting in a crash. You should move them into the scopes where you know each was successfully opened.
The big problem, the one that prompted this question, is that you are not handling cases where the size of the file is not an even multiple of whatever BYTE_SIZE is. Since you allocated enough memory for the whole file you should just read and write the whole file. fread(buf, 1, size, src); and fwrite(buf, 1, size, dst);. In general it is best to make the element size parameter of fread/fwrite 1 and the count the number of bytes you want to read or write. There's no math to go wrong, and you can tell exactly how many bytes were read/written.
Here's a version of your original function that I've corrected and annotated so it works if nothing goes wrong.
void originalBinaryCopy(const char *srcFilename, const char *dstFilename)
{
//odd size to ensure remainder
const size_t BYTE_SIZE = 777;
int *buf = 0;
int elements = 0;
int size = 0, wantOverwrite = 0;
FILE *src = fopen(srcFilename, "rb");
//This truncates dst, so the overwirte check is meaningless
FILE *dst = fopen(dstFilename, "w+b");
if (src)
{
if (dst)
{
fseek(src, 0L, SEEK_END);
size = ftell(src);
fseek(src, 0L, SEEK_SET);
//always check for NULL after malloc - This should be a char*
buf = (int *)malloc(size);
if (!buf)
{
fclose(dst);
fclose(src);
return;
}
elements = fread(buf, BYTE_SIZE, size / BYTE_SIZE, src);
fwrite(buf, BYTE_SIZE, elements, dst);
//added, copy remainder
elements = fread(buf, 1, size % BYTE_SIZE, src);
fwrite(buf, 1, size % BYTE_SIZE, dst);
//end
printf("copy completed %s -> %s\n", srcFilename, dstFilename);
free(buf);
}
}
//dst could be NULL here, move inside if(dst) scope above
fclose(dst);
//src could be NULL here, move inside if(src) scope above
fclose(src);
if (comp(srcFilename, dstFilename) != 0)
{
printf("compare failed - %s -> %s\n", srcFilename, dstFilename);
}
}
Notice how the remainder is handled at the end.
Here is how I would handle copying files along with a test suite to create, copy, and verify a set of files. It shows how to avoid truncating the destination if you don't want to and has quite a bit of error checking in the actual functions. I did not include any specific error checking on the caller side, but for real code I would have enumerated all of the possible errors and used those return values to pass to an error handling function that could print them out and possibly exit the program.
Manipulating files is one thing you want to be VERY careful about since there's potential for data loss if your code doesn't work, so before you use it with real files make sure it's 100% solid with test files.
#include <malloc.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define TEST_FILE_MIN 1024
#define TEST_FILE_MAX 1024 * 1024
const char *src_pattern = "src_file_%08x.bin";
const char *dst_pattern = "dst_file_%08x.bin";
void createTestFiles(const char *pattern)
{
char filename[256] = { 0 };
char buffer[1024];
for (size_t i = 0; i < sizeof(buffer); ++i)
{
buffer[i] = rand();
}
for (size_t i = TEST_FILE_MIN; i <= TEST_FILE_MAX; i *= 2)
{
sprintf(filename, pattern, i);
FILE *dst = fopen(filename, "wb");
if (dst)
{
size_t reps = i / TEST_FILE_MIN;
for (size_t w = 0; w < reps; ++w)
{
fwrite(buffer, 1, sizeof(buffer), dst);
}
fclose(dst);
}
}
}
int comp(const char *srcFilename, const char *dstFilename)
{
FILE *src = fopen(srcFilename, "rb");
if (!src)
{
return -1;
}
//open for reading to check for existence
FILE *dst = fopen(dstFilename, "rb");
if (!dst)
{
fclose(src);
return -2;
}
fseek(src, 0, SEEK_END);
size_t srcSize = ftell(src);
fseek(src, 0, SEEK_SET);
fseek(dst, 0, SEEK_END);
size_t dstSize = ftell(dst);
fseek(dst, 0, SEEK_SET);
if (srcSize == 0 || dstSize == 0 || srcSize != dstSize)
{
fclose(src);
fclose(dst);
return -3;
}
unsigned char *srcBuf = (unsigned char *)calloc(1, srcSize);
unsigned char *dstBuf = (unsigned char *)calloc(1, srcSize);
if (!srcBuf || !dstBuf)
{
fclose(src);
fclose(dst);
return -4;
}
if (fread(srcBuf, 1, srcSize, src) != srcSize)
{
fclose(src);
fclose(dst);
return -5;
}
if (fread(dstBuf, 1, dstSize, dst) != dstSize)
{
fclose(src);
fclose(dst);
return -6;
}
fclose(src);
fclose(dst);
//result * 100 to make this error outside te range of the other general errors from this function.
int result = memcmp(srcBuf, dstBuf, srcSize) * 100;
free(srcBuf);
free(dstBuf);
return result;
}
void originalBinaryCopy(const char *srcFilename, const char *dstFilename)
{
//odd size to ensure remainder
const size_t BYTE_SIZE = 777;
int *buf = 0;
int elements = 0;
int size = 0, wantOverwrite = 0;
FILE *src = fopen(srcFilename, "rb");
//This truncates dst, so the overwirte check is meaningless
FILE *dst = fopen(dstFilename, "w+b");
if (src)
{
if (dst)
{
fseek(src, 0L, SEEK_END);
size = ftell(src);
fseek(src, 0L, SEEK_SET);
//always check for NULL after malloc - This should be a char*
buf = (int *)malloc(size);
if (!buf)
{
fclose(dst);
fclose(src);
return;
}
elements = fread(buf, BYTE_SIZE, size / BYTE_SIZE, src);
fwrite(buf, BYTE_SIZE, elements, dst);
//added, copy remainder
elements = fread(buf, 1, size % BYTE_SIZE, src);
fwrite(buf, 1, size % BYTE_SIZE, dst);
//end
printf("copy completed %s -> %s\n", srcFilename, dstFilename);
free(buf);
}
}
//dst could be NULL here, move inside if(dst) scope above
fclose(dst);
//src could be NULL here, move inside if(src) scope above
fclose(src);
if (comp(srcFilename, dstFilename) != 0)
{
printf("compare failed - %s -> %s\n", srcFilename, dstFilename);
}
}
int binaryCopy(const char *srcFilename, const char *dstFilename, bool overwrite)
{
//arbitrary odd size so we can make sure we handle a partial buffer.
//assuming the code tests successfully I'd use something like 64 * 1024.
unsigned char buffer[7777] = { 0 };
FILE *src = fopen(srcFilename, "rb");
if (!src)
{
//Error, source file could not be opened
return -1;
}
//open for reading to check for existence
FILE *dst = fopen(dstFilename, "rb");
if (dst)
{
if (!overwrite)
{
//Error, dest file exists and we can't overwrite it
fclose(src);
fclose(dst);
return -2;
}
//reopen dst it for writing
if (!freopen(dstFilename, "wb", dst))
{
fclose(src);
fclose(dst);
dst = NULL;
}
}
else
{
//it didn't exist, create it.
dst = fopen(dstFilename, "wb");
}
if (!dst)
{
//Error, dest file couldn't be opened
fclose(src);
return -3;
}
//Get the size of the source file for comparison with what we read and write.
fseek(src, 0, SEEK_END);
size_t srcSize = ftell(src);
fseek(src, 0, SEEK_SET);
size_t totalRead = 0;
size_t totalWritten = 0;
size_t bytesRead = 0;
while (bytesRead = fread(buffer, 1, sizeof(buffer), src))
{
totalRead += bytesRead;
totalWritten += fwrite(buffer, 1, bytesRead, dst);
}
fclose(dst);
fclose(src);
if (totalRead != srcSize)
{
//src read error
return -4;
}
if (totalWritten != srcSize)
{
//dst write error
return -5;
}
return 0;
}
int main()
{
srand((unsigned)time(0));
createTestFiles(src_pattern);
for (size_t i = TEST_FILE_MIN; i <= TEST_FILE_MAX; i *= 2)
{
char srcName[256];
char dstName[256];
sprintf(srcName, src_pattern, i);
sprintf(dstName, dst_pattern, i);
//use my copy to create dest file
if (binaryCopy(srcName, dstName, true) != 0)
{
printf("File: '%s' failed initial copy.", srcName);
}
originalBinaryCopy(srcName, dstName);
if (binaryCopy(srcName, dstName, true) != 0)
{
printf("File: '%s' failed overwrite copy.", srcName);
}
if (binaryCopy(srcName, dstName, false) == 0)
{
printf("File: '%s' succeeded when file exists and overwrite was not set.", srcName);
}
//If compare succeeds delete the files, otherwise leave them for external comparison and print an error.
if (comp(srcName, dstName) == 0)
{
if (remove(srcName) != 0)
{
perror("Could not remove src.");
}
if (remove(dstName) != 0)
{
perror("Could not remove dst.");
}
}
else
{
printf("File: '%s' did not compare equal to '%s'.", srcName, dstName);
}
}
return 0;
}
Hopefully this gives you something to experiment with to make sure your copier is as good as it can be. Also worth noting, I would not distinguish copying text/binary files. Files are files and if your goal is to copy them then you should always do it in binary mode so the copy is identical. On operating systems other than Windows it wouldn't matter, but on Windows there are a number of pitfalls you can run into in text mode. Best to avoid those completely if you can.
Good luck!
The most probable cause for your observation is the file size is not a multiple of BYTE_SIZE: fread(buf, BYTE_SIZE, size / BYTE_SIZE , src); reads a multiple of BYTE_SIZE and the fwrite call writes the bytes read.
If BYTE_SIZE is 4, as the type int* buf = 0; seems to indicate, and if the source file has 3 more bytes than a multiple of 4, your observations would be fully explained.
You can correct the problem by making buf an unsigned char * and changing the code to:
elements = fread(buf, 1, size , src);
fwrite(buf, 1, elements, dst);
Note also that there is no need to open the files in update mode (the + in the mode string), errors and not handled explicitly and the fclose() calls are misplaced.
Also it seems incorrect to truncate the destination file if overwrite() returns 0.
Here is a corrected version with better error handling:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
int binaryCopy(char *argv[]) {
FILE *src, *dst;
long file_size;
size_t size, size_read, size_written;
int wantOverwrite;
unsigned char *buf;
if ((src = fopen(argv[SRC_POS], "rb")) == NULL) {
printf("cannot open input file %s: %s\n", argv[SRC_POS], strerror(errno));
return -1;
}
wantOverwrite = overwrite();
if (!wantOverwrite) {
fclose(src);
return 0;
}
if ((dst = fopen(argv[DST_POS], "wb")) == NULL) {
printf("cannot open output file %s: %s\n", argv[DST_POS], strerror(errno));
fclose(src);
return -1;
}
fseek(src, 0L, SEEK_END);
file_size = ftell(src);
fseek(src, 0L, SEEK_SET);
size = (size_t)file_size;
if ((long)size != file_size) {
printf("file size too large for a single block: %ld\n", file_size);
fclose(src);
fclose(dst);
return -1;
}
buf = malloc(size);
if (buf == NULL) {
printf("cannot allocate block of %zu bytes\n", size);
fclose(src);
fclose(dst);
return -1;
}
size_read = fread(buf, 1, size, src);
if (size_read != size) {
printf("read error: %zu bytes read out of %zu\n", size_read, size);
}
size_written = fwrite(buf, 1, size_read, dst);
if (size_written != size_read) {
printf("write error: %zu bytes written out of %zu\n", size_written, size_read);
}
if (size_written == size) {
printf("copy completed\n");
}
free(buf);
fclose(dst);
fclose(src);
return 0;
}
I have the following function:
void read_file(char* path, char** data)
{
FILE* file = NULL;
size_t size, result = 0;
*data = NULL;
file = fopen(path, "rb");
if (file == NULL)// error opening file
{
return;
}
fseek(file, 0, SEEK_END);
size = ftell(file) + 1;
rewind(file);
*data = (char*)malloc(size);
if(*data == NULL)
return;
result = fread(*data, 1, size, file);
if (result != size - 1)// error reding file
{
*data = NULL;
}
printf("LINE=%u\n", __LINE__);
(*data)[size-1] = '\0';
printf("LINE=%u\n", __LINE__);
fclose(file);
return;
}
I am getting a Segmentation fault on the line right in between the two printf("LINE=%u\n", __LINE__); statements. I don't understand why this is. When I'm looking at this line, it seems (*data) would have a type of (char *) which should certainly be able to be used with the index operator [].
What am I missing?
Probably the if (result != size - 1) test is failing and then you reset *data to NULL (which is a memory leak, BTW), and then you try to write to (*data)[size-1] - oops !
some pointers:
ftell returns -1 on failure, so if that is the case this will be 0 size = ftell(file) + 1;
size_t on some platforms is unsigned int, it may be good to have that in mind.
doing *data = NULL; is not a good idea, free it first free( *data );
put some if statements in your code to catch errors, don't assume everything will work
e.g. assert( size>0 );
I have tested your code and it works for me - I have added returning of file's size to properly pass the data to fwrite.
> ./a.out arm-2010.09-good.tar.bz2 | sha1sum && sha1sum arm-2010.09-good.tar.bz2
alloc size of 37265592
6bdff517bcdd1d279fc84ab3a5fbbca34211a87c -
6bdff517bcdd1d279fc84ab3a5fbbca34211a87c arm-2010.09-good.tar.bz2
furthermore Valgrind reports no warning and errors so .. loooks OK!
#include <stdio.h>
#include <stdlib.h>
size_t read_file(char* path, char** data)
{
FILE* file = NULL;
size_t size, result = 0;
*data = NULL;
file = fopen(path, "rb");
if (file == NULL)// error opening file
{
return 0;
}
fseek(file, 0, SEEK_END);
size = ftell(file) + 1;
rewind(file);
fprintf(stderr, "alloc size of %i\n", size);
*data = (char*)malloc(size);
if(*data == NULL)
return 0;
result = fread(*data, 1, size, file);
if (result != size - 1)// error reding file
*data = NULL;
(*data)[size-1] = '\0';
size--; // report file size
fclose(file);
return size;
}
int main(int argc, char** argv)
{
char* data;
if(argc<2)
return 0;
size_t siz = read_file(argv[1], &data);
if(data) {
fwrite(data, 1, siz, stdout);
free(data);
}
else {
fprintf(stderr, "No data returned\n");
}
return 0;
}
Here's the probable source of the problem:
if (result != size - 1)// error reding file
{
*data = NULL;
}
printf("LINE=%u\n", __LINE__);
(*data)[size-1] = '\0';
What happens if there is an error reading the file? You set *data to NULL, and then immediately try to dereference it - bad juju.
Note that this also results in a memory leak; you don't free the memory that *data points to.
Restructure your code so that (*data)[size-1] = '\0' is executed only if the read operation was successful:
if (result != size - 1)
{
free(*data);
*data = NULL;
}
else
{
(*data)[size-1] = 0;
}