Problam with fread in C - c

I tried to read binary files into dinamic string and somthing go wrong.
I cant set free the string and i cant print or do anything else with it.
The files are OK if I just open it without all the dinamic stuff it runs well.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#pragma warning (disable: 4996)
#define STR_LEN 50
int main(int args, char** argv)
{
char filePath[STR_LEN];
char signaturePath[STR_LEN];
FILE* file;
FILE* signature;
int fileSize;
int signatureSize;
strcpy(filePath, argv[2]);
strcpy(signaturePath, argv[1]);
file = fopen(filePath, "rb");
signature = fopen(signaturePath, "rb");
if (file == NULL)
printf("e: f\n");
if (signature == NULL)
printf("e: s\n");
fseek(file, 0L, SEEK_END);
fileSize = ftell(file);
fseek(file, 0L, SEEK_SET);
fseek(signature, 0L, SEEK_END);
signatureSize = ftell(signature);
fseek(signature, 0L, SEEK_SET);
char* fileStr = (char)malloc(sizeof(char) * fileSize + 1);
char* signatureStr = (char)malloc(sizeof(char) * signatureSize + 1);
fread(fileStr, fileSize, 1, file);
fread(signatureStr, signatureSize, 1, signature);
free(fileStr);
free(signatureStr);
fclose(file);
fclose(signature);
return 0;
}

You are casting the pointers that malloc() returns to char. In typical environment, char is 1-byte long while pointers are 4-byte or 8-byte long. The cast will truncate the pointers, turning them to some invalid value.
Casting results of malloc() family is considered as a bad practice. Remove the harmful casts to fix.

Related

Writing to file in C overwrites variables

Description:
I have created a small program that stores the name and checksum of a file in a struct, for each file in a directory. When output is written to stdout with printf, everything seems fine, but if we write to a file with either fputs or fprintf, values get overwritten, perhaps because of some buffer overflow?
Output from main with print.
Name: 2.txt. Checksum: fc769d448ed4e08bd855927bad2c8e43efdf5315a6daa9f28577758786d52eaf
Name: 1.txt. Checksum: 2d46cffd0302c5537ddb4952a9cca7d66060dafecd56fe3a7fe8e5e5cabbbbf9
Name: 3.txt. Checksum: 37bb2e5563e94eee68fac6b07501c44f018599482e897a626a94dd88053b4b7e
However, if we print the values of checksumMaps[0] to a file,
the value checksumMaps[0].filename gets overwritten (with the last 2 bytes of the checksum string) as seen by:
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2+1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
The program writes aftxt to stdout instead of 2.txt.
Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line fputs(formatted_bytes, fp);. What could be the reason for this?
Minimal Reproducible Example
ArchiveFile.h
typedef struct ArchiveFile{
char *uuid;
char *checksum;
char *relative_path;
int is_binary;
} ArchiveFile;
typedef struct file_content{
unsigned char* bytes;
unsigned long file_size;
} file_content;
void set_uuid(ArchiveFile *file, char* uuid);
char* get_absolute_path(ArchiveFile *file, char* root);
char* get_file_text(ArchiveFile *file, char* root);
void get_bytes(ArchiveFile *file, char* root, unsigned char *buffer, size_t fsize);
long get_file_size(ArchiveFile *file, char *root);
ArchiveFile.c
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include "ArchiveFile.h"
#include <string.h>
void set_uuid(ArchiveFile* file, char* uuid){
file->uuid = uuid;
}
char* get_absolute_path(ArchiveFile *file, char* root){
/* Allocate space according to the relative path +
the root path + null terminating byte.*/
char* absolute_path = malloc(strlen(file->relative_path) + strlen(root) + 1);
// Add the root path.
strcpy(absolute_path, root);
// Concatonate the root with the rest of the path.
strcat(absolute_path, file->relative_path);
return absolute_path;
}
char* get_file_text(ArchiveFile *file, char* root){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "r");
if(fp == NULL)
printf("Could not open file %s \n", absolute_path);
// Platform independent way of getting the file size in bytes.
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
char *buffer = malloc(fsize);
if(fp){
fread(buffer, sizeof(char), fsize, fp);
}
fclose(fp);
free(absolute_path);
return buffer;
}
void print_bytes2(unsigned char* md, size_t size){
for (size_t i = 0; i < size; i++) {
printf("%02x ", md[i]);
}
printf("\n");
}
void get_bytes(ArchiveFile *file, char *root, unsigned char *buffer, size_t fsize){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "rb");
if(fp){
fread(buffer, 1, fsize, fp);
}
free(absolute_path);
fclose(fp);
}
long get_file_size(ArchiveFile *file, char *root){
char* filepath = get_absolute_path(file, root);
FILE *fp = fopen(filepath, "rb");
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
free(filepath);
fclose(fp);
return fsize;
}
checksum/checksum.h
// Used to store information about filename and checksum.
typedef struct ChecksumMap{
char* filename;
unsigned char checksum [32];
} ChecksumMap;
int calculate_checksum(void* input, unsigned long length, unsigned char* md);
checksum/checksum.h
#include <stdio.h>
#include <openssl/sha.h>
#include "checksum.h"
int calculate_checksum(void* input, unsigned long length, unsigned char* md){
SHA256_CTX context;
if(!SHA256_Init(&context))
return 0;
if(!SHA256_Update(&context, (unsigned char*)input, length))
return 0;
if(!SHA256_Final(md, &context))
return 0;
return 1;
}
main.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include "ArchiveFile.h"
#include "checksum/checksum.h"
void format_bytes(char* buffer, unsigned char* md){
for (int i = 0; i < 32; i++) {
sprintf(&buffer[i*2], "%02x", md[i]);
}
buffer[32*2] = '\0';
}
void *listdir(char *name, int count, ChecksumMap *checksumMaps)
{
DIR *dir;
struct dirent *direntry;
if (!(dir = opendir(name)))
return NULL;
while ((direntry = readdir(dir)) != NULL) {
// If we reach a directory (that is not . or ..) then recursive step.
if (direntry->d_type == DT_DIR) {
char path[1024];
if (strcmp(direntry->d_name, ".") == 0 || strcmp(direntry->d_name, "..") == 0)
continue;
snprintf(path, sizeof(path), "%s/%s", name, direntry->d_name);
listdir(path, count, checksumMaps);
} else {
unsigned char md[32];
ArchiveFile file;
file.relative_path = direntry->d_name;
// Get the full path of the file:
char parent_name[strlen(name)+1];
memset(&parent_name[0], 0, sizeof(parent_name));
strcat(parent_name, name);
strcat(parent_name, "/");
size_t fsize = get_file_size(&file, parent_name);
unsigned char *bytes = malloc(sizeof(char) * fsize);
get_bytes(&file, parent_name, bytes, fsize);
calculate_checksum((void*) bytes, fsize, md);
ChecksumMap checksumMap = {.filename=file.relative_path};
memcpy(checksumMap.checksum, md,
sizeof(checksumMap.checksum));
free(bytes);
}
}
closedir(dir);
return NULL;
}
int main(int argc, char const *argv[]) {
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2+1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
}
Compile with gcc:
gcc -Wall -Wextra main.c ArchiveFile.c checksum/checksum.c -lcrypto
The program writes aftxt to stdout instead of 2.txt. Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line fputs(formatted_bytes, fp);. What could be the reason for this?
Hard to say, because we're in the domain of UB (undefined behavior). But there are two obvious candidates here.
formatted_bytes is not properly terminated, causing fputs to read past the array, invoking UB.
fp is not a valid stream. The reason could be that it's not initialized, or changed, or the stream is closed or something.
Enable -Wall -Wextra -fsanitize=address. You could also try -fsanitize=undefined.
Check all return values. malloc, fopen and fputs returns a value that can be used for error checking.
Replace formatted_bytes with a hardcoded string that have the value you think it has.
Learn how to create a Minimal, Reproducible Example and how to debug small c programs. It's a guide I wrote a while ago.
Update
It seems that there was some different problems with the code.
First thing to notice is file.relative_path = direntry->d_name;, however the value that direntry points to changes in each iteration, thus the value file.relative_path points to, also changes. Furthermore, the size of the string stored in file.relative_path has never been specified, which would be a problem, if we use strcpy.
The solution is to specify a size for file.relative_path and use strcpy to copy the value of direntry->d_name. Also, no need for the checksumMap struct, since ArchiveFile already can store the same information (again, specify a size for the checksum).
Thing to keep in mind when you work with strings, buffers, arrays in C:
Remember that strings in C are based on char arrays, themselves based on a pointer to the first element. Assigning the value of one string to another might return in unexpected behavior when you actually want to copy the value of the string, not the address to the first element.
One bug here:
char parent_name[strlen(name)+1];
memset(&parent_name[0], 0, sizeof(parent_name)); // could have been parent_name[0]='\0'; instead
strcat(parent_name, name); // Now the parent_name buffer is full and null terminated.
strcat(parent_name, "/"); // this overwrites the null terminator and writes a new one out-of-bounds
You should have done something like this:
size_t length = strlen(name);
char parent_name[length+1+1];
memcpy(parent_name, name, length); // copies characters only (fast) but not the null term
parent_name[length] = '/'; // append this single character 1 symbol past "name" string
parent_name[length+1] = '\0'; // add manual null termination

LzmaLib: compress / decompress buffer in C

I'm trying to use LzmaLib's LzmaCompress() and LzmaDecompress() with buffers, adapting the examples provided here.
I'm testing with a ~3MB buffer and the compression function seems to work fine (produces a ~1.2MB compressed buffer), but when I try to decompress, it just extracts ~300 bytes and returns SZ_ERROR_DATA.
The few extracted bytes are right, but I don't know why it stops there.
My code:
#include <stdio.h>
#include <stdlib.h>
#include "LzmaLib.h"
void compress(
unsigned char **outBuf, size_t *dstLen,
unsigned char *inBuf, size_t srcLen)
{
unsigned propsSize = LZMA_PROPS_SIZE;
*dstLen = srcLen + srcLen / 3 + 128;
*outBuf = (unsigned char*)malloc(propsSize + *dstLen);
int res = LzmaCompress(
(unsigned char*)(*outBuf + LZMA_PROPS_SIZE), dstLen,
inBuf, srcLen,
*outBuf, &propsSize,
-1, 0, -1, -1, -1, -1, -1);
assert(res == SZ_OK);
*dstLen = *dstLen + LZMA_PROPS_SIZE;
}
void uncompress(
unsigned char **outBuf, size_t *dstLen,
unsigned char *inBuf, size_t srcLen
) {
*dstLen = 5000000;
*outBuf = (unsigned char*)malloc(*dstLen);
srcLen = srcLen - LZMA_PROPS_SIZE;
int res = LzmaUncompress(
*outBuf, dstLen,
(unsigned char*)(inBuf + LZMA_PROPS_SIZE), &srcLen,
inBuf, LZMA_PROPS_SIZE);
assert(res == SZ_OK);
}
void do_compress() {
FILE* file = fopen("Module.dll", "r");
size_t size, decSize;
unsigned char *data, *dec = NULL;
fseek(file, 0L, SEEK_END);
size = ftell(file);
fseek(file, 0L, SEEK_SET);
data = (unsigned char*)malloc(size);
fread(data, 1, size, file);
fclose(file);
compress((unsigned char**)&dec, &decSize, data, size);
file = fopen("Module.lzma", "w");
fwrite(dec, 1, decSize, file);
fclose(file);
}
void do_uncompress() {
FILE* file = fopen("Module.lzma", "r");
size_t size, decSize;
unsigned char *data, *dec = NULL;
fseek(file, 0L, SEEK_END);
size = ftell(file);
fseek(file, 0L, SEEK_SET);
data = (unsigned char*)malloc(size);
fread(data, 1, size, file);
fclose(file);
uncompress((unsigned char**)&dec, &decSize, data, size);
file = fopen("Module_DEC.dll", "w");
fwrite(dec, 1, decSize, file);
fclose(file);
}
int main()
{
do_compress();
do_uncompress();
return 0;
}
If this code is not the better way to compress buffers with LzmaLib, I'm happy to accept suggestions.
I bet the problem lurks in how you read/write your files. You need to open them in binary mode to prevent any substitutions during read/write operations.
Change all instances of:
fopen(xxx, "r") -> fopen(xxx, "rb")
fopen(xxx, "w") -> fopen(xxx, "wb")
I didn't check this specificly for LzmaCompress but most of the other compressing libraries like libz handle that function similar to the standard read/write or fread/fwrite functions, i.e. allowing you to continuously calling the functions to compress more and more data in one stream. So at some point, you will have to say "I'm done, please flush everything not written so far". Possibly, you forgot that part. If not, a Minimal, Complete, and Verifiable example would be cool.
When you compress, you pass the number of compressed output bytes to the caller. But your buffer contains LZMA_PROPS_SIZE additional bytes. So, when writing the lzma file, you actually forget the last LZMA_PROPS_SIZE bytes and on later reading, those are missing.

C: Zlib compress not working

I'm trying a very simple thing: read a minimal text file and compress it with the compress() utility from zlib. I think I've done everything fine, I allocate filesize * 10 for the output, it should be more that enough, but I keep getting -5 (Z_BUF_ERROR) as result of the operation.
Any help?
#include <stdio.h>
#include <stdlib.h>
#include "zlib.h"
#define FILE_TO_OPEN "text.txt"
static char* readcontent(const char *filename, int* size)
{
char* fcontent = NULL;
int fsize = 0;
FILE* fp = fopen(filename, "r");
if(fp) {
fseek(fp, 0, SEEK_END);
fsize = ftell(fp);
rewind(fp);
fcontent = (char*) malloc(sizeof(char) * fsize);
fread(fcontent, 1, fsize, fp);
fclose(fp);
}
*size = fsize;
return fcontent;
}
int main(int argc, char const *argv[])
{
int input_size;
char* content_of_file = readcontent(FILE_TO_OPEN, &input_size);
printf("%d\n", input_size);
uLongf compressed_data_size;
char* compressed_data = malloc(sizeof(char) * (input_size * 10));
int result = compress((Bytef*) compressed_data, (uLongf*)&compressed_data_size, (const Bytef*)content_of_file, (uLongf)input_size);
printf("%d\n", result);
return 0;
}
Use fopen(filename, "rb"). If you are on Windows that b is important to avoid corruption of binary data.
Use compressBound() in zlib instead of input_size * 10 and set compressed_data_size before calling compress(). (You do not need to and should not write your own compressBound().)
Try
uLongf compressed_data_size = compressBound(input_size);
compressBound should be available in zlib.
Also you are better of probably using rb in fopen like I mentioned in my comment before.

Reading a text file in C

I am reading a text file and trying to display its contents on the console. Here is my code:
#include "stdafx.h"
#include <stdio.h>
#include <string.h>
#include <fstream>
int main()
{
FILE* fp=NULL;
char buff[100];
fp=fopen("myfile.txt","r");
if(fp==NULL)
{
printf("Couldn't Open the File!!!\n");
}
fseek(fp, 0, SEEK_END);
size_t file_size = ftell(fp);
fread(buff,file_size,1,fp);
printf("Data Read [%s]",buff);
fclose(fp);
return 0;
}
but only redundant data is being displayed on the console; could someone please point out my mistake?
You forgot to reset the file pointer to start after doing this.
fseek(fp, 0, SEEK_END);
Do this after finding size (file_size).
rewind (fp);
You need to seek back to the start of the file before reading:
int main()
{
FILE* fp=NULL;
char buff[100];
fp=fopen("myfile.txt","r");
if(fp==NULL)
{
printf("Couldn't Open the File!!!\n");
exit(1); // <<< handle fopen failure
}
fseek(fp, 0, SEEK_END);
size_t file_size = ftell(fp);
fseek(fp, 0, SEEK_SET); // <<< seek to start of file
fread(buff,file_size,1,fp);
printf("Data Read [%s]",buff);
fclose(fp);
return 0;
}
Try it....
#include <stdio.h>
#include <stdlib.h>
void handle_line(char *line) {
printf("%s", line);
}
int main(int argc, char *argv[]) {
int size = 1024, pos;
int c;
char *buffer = (char *)malloc(size);
FILE *f = fopen("myfile.txt", "r");
if(f) {
do { // read all lines in file
pos = 0;
do{ // read one line
c = fgetc(f);
if(c != EOF) buffer[pos++] = (char)c;
if(pos >= size - 1) { // increase buffer length - leave room for 0
size *=2;
buffer = (char*)realloc(buffer, size);
}
}while(c != EOF && c != '\n');
buffer[pos] = 0;
// line is now in buffer
handle_line(buffer);
} while(c != EOF);
fclose(f);
}
free(buffer);
return 0;
}
#include "stdafx.h"
#include <stdio.h>
#include <string.h>
#include <fstream>
int main()
{
FILE* fp=NULL;
char *buff; //change array to pointer
fp=fopen("myfile.txt","r");
if(fp==NULL)
{
printf("Couldn't Open the File!!!\n");
}
fseek(fp, 0, SEEK_END);
size_t file_size = ftell(fp);
buff = malloc(file_size); //allocating memory needed for reading file data
fseek(fp,0,SEEK_SET); //changing fp to point start of file data
fread(buff,file_size,1,fp);
printf("Data Read [%s]",buff);
fclose(fp);
return 0;
}
having a buffer of 100 bytes to read a file is not a better idea as since the file size may be more than 100 bytes.
A better file io can be done by doing a fgets on the file, if its not a type of metadata that you wanted to read using the fread.
fgets in a while loop can be used to check whether its reached EOF or a feof call can be used to check the EOF.
a sample code listing of fgets can be like this:
while (fgets(buf, len, fp)) {
printf("%s", buf);
}
or a sample that is used with fgets can be like this:
while (fread(buf, len, 1, fp) >= 0) {
printf("%s\n", buf);
}

Read file into memory?

I can seem to only read file into memory if I explicitly declare the buffer size. This works
#include <stdio.h>
int main(){
FILE *fp = fopen("test.log", "rb");
char buffer[37];
fread(buffer, 1, 36, fp);
printf("%s", buffer);
}
This will add junk to the output
#include <stdio.h>
int main(){
FILE *fp = fopen("test.log", "rb");
fseek(fp, 0, SEEK_END);
long siz = ftell(fp);
rewind(fp);
char buffer[siz + 1];
fread(buffer, 1, siz, fp);
printf("%s", buffer);
}
insert buffer[siz]='\0'; before printf("%s", buffer);
Try a different approach - use a "memory map". What it does is it allows you to access the file as if it was a memory block. This can dramatically improve performance while simplifying your code at the same time.
Read more about it at http://en.wikipedia.org/wiki/Mmap

Resources