I'm trying a very simple thing: read a minimal text file and compress it with the compress() utility from zlib. I think I've done everything fine, I allocate filesize * 10 for the output, it should be more that enough, but I keep getting -5 (Z_BUF_ERROR) as result of the operation.
Any help?
#include <stdio.h>
#include <stdlib.h>
#include "zlib.h"
#define FILE_TO_OPEN "text.txt"
static char* readcontent(const char *filename, int* size)
{
char* fcontent = NULL;
int fsize = 0;
FILE* fp = fopen(filename, "r");
if(fp) {
fseek(fp, 0, SEEK_END);
fsize = ftell(fp);
rewind(fp);
fcontent = (char*) malloc(sizeof(char) * fsize);
fread(fcontent, 1, fsize, fp);
fclose(fp);
}
*size = fsize;
return fcontent;
}
int main(int argc, char const *argv[])
{
int input_size;
char* content_of_file = readcontent(FILE_TO_OPEN, &input_size);
printf("%d\n", input_size);
uLongf compressed_data_size;
char* compressed_data = malloc(sizeof(char) * (input_size * 10));
int result = compress((Bytef*) compressed_data, (uLongf*)&compressed_data_size, (const Bytef*)content_of_file, (uLongf)input_size);
printf("%d\n", result);
return 0;
}
Use fopen(filename, "rb"). If you are on Windows that b is important to avoid corruption of binary data.
Use compressBound() in zlib instead of input_size * 10 and set compressed_data_size before calling compress(). (You do not need to and should not write your own compressBound().)
Try
uLongf compressed_data_size = compressBound(input_size);
compressBound should be available in zlib.
Also you are better of probably using rb in fopen like I mentioned in my comment before.
Related
Description:
I have created a small program that stores the name and checksum of a file in a struct, for each file in a directory. When output is written to stdout with printf, everything seems fine, but if we write to a file with either fputs or fprintf, values get overwritten, perhaps because of some buffer overflow?
Output from main with print.
Name: 2.txt. Checksum: fc769d448ed4e08bd855927bad2c8e43efdf5315a6daa9f28577758786d52eaf
Name: 1.txt. Checksum: 2d46cffd0302c5537ddb4952a9cca7d66060dafecd56fe3a7fe8e5e5cabbbbf9
Name: 3.txt. Checksum: 37bb2e5563e94eee68fac6b07501c44f018599482e897a626a94dd88053b4b7e
However, if we print the values of checksumMaps[0] to a file,
the value checksumMaps[0].filename gets overwritten (with the last 2 bytes of the checksum string) as seen by:
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2+1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
The program writes aftxt to stdout instead of 2.txt.
Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line fputs(formatted_bytes, fp);. What could be the reason for this?
Minimal Reproducible Example
ArchiveFile.h
typedef struct ArchiveFile{
char *uuid;
char *checksum;
char *relative_path;
int is_binary;
} ArchiveFile;
typedef struct file_content{
unsigned char* bytes;
unsigned long file_size;
} file_content;
void set_uuid(ArchiveFile *file, char* uuid);
char* get_absolute_path(ArchiveFile *file, char* root);
char* get_file_text(ArchiveFile *file, char* root);
void get_bytes(ArchiveFile *file, char* root, unsigned char *buffer, size_t fsize);
long get_file_size(ArchiveFile *file, char *root);
ArchiveFile.c
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include "ArchiveFile.h"
#include <string.h>
void set_uuid(ArchiveFile* file, char* uuid){
file->uuid = uuid;
}
char* get_absolute_path(ArchiveFile *file, char* root){
/* Allocate space according to the relative path +
the root path + null terminating byte.*/
char* absolute_path = malloc(strlen(file->relative_path) + strlen(root) + 1);
// Add the root path.
strcpy(absolute_path, root);
// Concatonate the root with the rest of the path.
strcat(absolute_path, file->relative_path);
return absolute_path;
}
char* get_file_text(ArchiveFile *file, char* root){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "r");
if(fp == NULL)
printf("Could not open file %s \n", absolute_path);
// Platform independent way of getting the file size in bytes.
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
char *buffer = malloc(fsize);
if(fp){
fread(buffer, sizeof(char), fsize, fp);
}
fclose(fp);
free(absolute_path);
return buffer;
}
void print_bytes2(unsigned char* md, size_t size){
for (size_t i = 0; i < size; i++) {
printf("%02x ", md[i]);
}
printf("\n");
}
void get_bytes(ArchiveFile *file, char *root, unsigned char *buffer, size_t fsize){
char* absolute_path = get_absolute_path(file, root);
FILE *fp = fopen(absolute_path, "rb");
if(fp){
fread(buffer, 1, fsize, fp);
}
free(absolute_path);
fclose(fp);
}
long get_file_size(ArchiveFile *file, char *root){
char* filepath = get_absolute_path(file, root);
FILE *fp = fopen(filepath, "rb");
fseek(fp, 0, SEEK_END);
long fsize = ftell(fp);
fseek(fp, 0, SEEK_SET); /* same as rewind(f); */
free(filepath);
fclose(fp);
return fsize;
}
checksum/checksum.h
// Used to store information about filename and checksum.
typedef struct ChecksumMap{
char* filename;
unsigned char checksum [32];
} ChecksumMap;
int calculate_checksum(void* input, unsigned long length, unsigned char* md);
checksum/checksum.h
#include <stdio.h>
#include <openssl/sha.h>
#include "checksum.h"
int calculate_checksum(void* input, unsigned long length, unsigned char* md){
SHA256_CTX context;
if(!SHA256_Init(&context))
return 0;
if(!SHA256_Update(&context, (unsigned char*)input, length))
return 0;
if(!SHA256_Final(md, &context))
return 0;
return 1;
}
main.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <sys/types.h>
#include "ArchiveFile.h"
#include "checksum/checksum.h"
void format_bytes(char* buffer, unsigned char* md){
for (int i = 0; i < 32; i++) {
sprintf(&buffer[i*2], "%02x", md[i]);
}
buffer[32*2] = '\0';
}
void *listdir(char *name, int count, ChecksumMap *checksumMaps)
{
DIR *dir;
struct dirent *direntry;
if (!(dir = opendir(name)))
return NULL;
while ((direntry = readdir(dir)) != NULL) {
// If we reach a directory (that is not . or ..) then recursive step.
if (direntry->d_type == DT_DIR) {
char path[1024];
if (strcmp(direntry->d_name, ".") == 0 || strcmp(direntry->d_name, "..") == 0)
continue;
snprintf(path, sizeof(path), "%s/%s", name, direntry->d_name);
listdir(path, count, checksumMaps);
} else {
unsigned char md[32];
ArchiveFile file;
file.relative_path = direntry->d_name;
// Get the full path of the file:
char parent_name[strlen(name)+1];
memset(&parent_name[0], 0, sizeof(parent_name));
strcat(parent_name, name);
strcat(parent_name, "/");
size_t fsize = get_file_size(&file, parent_name);
unsigned char *bytes = malloc(sizeof(char) * fsize);
get_bytes(&file, parent_name, bytes, fsize);
calculate_checksum((void*) bytes, fsize, md);
ChecksumMap checksumMap = {.filename=file.relative_path};
memcpy(checksumMap.checksum, md,
sizeof(checksumMap.checksum));
free(bytes);
}
}
closedir(dir);
return NULL;
}
int main(int argc, char const *argv[]) {
FILE *fp = fopen("mychecksums.txt", "w");
char formatted_bytes[32*2+1];
char *filename = checksumMaps[0].filename;
format_bytes(formatted_bytes, checksumMaps[0].checksum);
fputs(filename, fp);
fputs(formatted_bytes, fp);
// We print the value of `filename` again in order to see that it has been overwritten.
printf("%s \n", filename);
fclose(fp);
}
Compile with gcc:
gcc -Wall -Wextra main.c ArchiveFile.c checksum/checksum.c -lcrypto
The program writes aftxt to stdout instead of 2.txt. Using gdb, I can see that the value of filename changes from 2.txt to aftxt after the line fputs(formatted_bytes, fp);. What could be the reason for this?
Hard to say, because we're in the domain of UB (undefined behavior). But there are two obvious candidates here.
formatted_bytes is not properly terminated, causing fputs to read past the array, invoking UB.
fp is not a valid stream. The reason could be that it's not initialized, or changed, or the stream is closed or something.
Enable -Wall -Wextra -fsanitize=address. You could also try -fsanitize=undefined.
Check all return values. malloc, fopen and fputs returns a value that can be used for error checking.
Replace formatted_bytes with a hardcoded string that have the value you think it has.
Learn how to create a Minimal, Reproducible Example and how to debug small c programs. It's a guide I wrote a while ago.
Update
It seems that there was some different problems with the code.
First thing to notice is file.relative_path = direntry->d_name;, however the value that direntry points to changes in each iteration, thus the value file.relative_path points to, also changes. Furthermore, the size of the string stored in file.relative_path has never been specified, which would be a problem, if we use strcpy.
The solution is to specify a size for file.relative_path and use strcpy to copy the value of direntry->d_name. Also, no need for the checksumMap struct, since ArchiveFile already can store the same information (again, specify a size for the checksum).
Thing to keep in mind when you work with strings, buffers, arrays in C:
Remember that strings in C are based on char arrays, themselves based on a pointer to the first element. Assigning the value of one string to another might return in unexpected behavior when you actually want to copy the value of the string, not the address to the first element.
One bug here:
char parent_name[strlen(name)+1];
memset(&parent_name[0], 0, sizeof(parent_name)); // could have been parent_name[0]='\0'; instead
strcat(parent_name, name); // Now the parent_name buffer is full and null terminated.
strcat(parent_name, "/"); // this overwrites the null terminator and writes a new one out-of-bounds
You should have done something like this:
size_t length = strlen(name);
char parent_name[length+1+1];
memcpy(parent_name, name, length); // copies characters only (fast) but not the null term
parent_name[length] = '/'; // append this single character 1 symbol past "name" string
parent_name[length+1] = '\0'; // add manual null termination
I am writing a program in C. I use low level functions like open, read, close. I have a file descriptor, etc, but I don't know how to print only the first 2 lines from a file that has e.g. 30 lines of text. how to do it?
you need to read a file into a string, iterate through the string, concat any character into the string variable, define a int variable for lines count, when lines reaches 2, break the loop
here’s an example how you can do it
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *get_first_two_lines(char *file_name) {
FILE *file = fopen(file_name, "r");
fseek(file, 0, SEEK_END);
long size = ftell(file);
fseek(file, 0, SEEK_SET);
char *buffer = malloc(size);
fread(buffer, 1, size, file);
char *two_lines = calloc(1, sizeof(char));
unsigned int lines = 0;
for (int i=0;i<strlen(buffer);i++) {
if (lines == 2) break;
if (buffer[i] == '\n') {
if (lines < 1) {
two_lines = realloc(two_lines, (strlen(two_lines) + 2) * sizeof(char));
strcat(two_lines, (char []) {'\n', 0});
}
lines++;
continue;
}
two_lines = realloc(two_lines, (strlen(two_lines) + 2) * sizeof(char));
strcat(two_lines, (char []) {buffer[i], 0});
}
return two_lines;
}
int main(int argc, char *argv[]) {
char *first_two_lines = get_first_two_lines("file_name");
printf("%s", first_two_lines);
return 0;
}
I tried to read binary files into dinamic string and somthing go wrong.
I cant set free the string and i cant print or do anything else with it.
The files are OK if I just open it without all the dinamic stuff it runs well.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#pragma warning (disable: 4996)
#define STR_LEN 50
int main(int args, char** argv)
{
char filePath[STR_LEN];
char signaturePath[STR_LEN];
FILE* file;
FILE* signature;
int fileSize;
int signatureSize;
strcpy(filePath, argv[2]);
strcpy(signaturePath, argv[1]);
file = fopen(filePath, "rb");
signature = fopen(signaturePath, "rb");
if (file == NULL)
printf("e: f\n");
if (signature == NULL)
printf("e: s\n");
fseek(file, 0L, SEEK_END);
fileSize = ftell(file);
fseek(file, 0L, SEEK_SET);
fseek(signature, 0L, SEEK_END);
signatureSize = ftell(signature);
fseek(signature, 0L, SEEK_SET);
char* fileStr = (char)malloc(sizeof(char) * fileSize + 1);
char* signatureStr = (char)malloc(sizeof(char) * signatureSize + 1);
fread(fileStr, fileSize, 1, file);
fread(signatureStr, signatureSize, 1, signature);
free(fileStr);
free(signatureStr);
fclose(file);
fclose(signature);
return 0;
}
You are casting the pointers that malloc() returns to char. In typical environment, char is 1-byte long while pointers are 4-byte or 8-byte long. The cast will truncate the pointers, turning them to some invalid value.
Casting results of malloc() family is considered as a bad practice. Remove the harmful casts to fix.
I'm trying to use LzmaLib's LzmaCompress() and LzmaDecompress() with buffers, adapting the examples provided here.
I'm testing with a ~3MB buffer and the compression function seems to work fine (produces a ~1.2MB compressed buffer), but when I try to decompress, it just extracts ~300 bytes and returns SZ_ERROR_DATA.
The few extracted bytes are right, but I don't know why it stops there.
My code:
#include <stdio.h>
#include <stdlib.h>
#include "LzmaLib.h"
void compress(
unsigned char **outBuf, size_t *dstLen,
unsigned char *inBuf, size_t srcLen)
{
unsigned propsSize = LZMA_PROPS_SIZE;
*dstLen = srcLen + srcLen / 3 + 128;
*outBuf = (unsigned char*)malloc(propsSize + *dstLen);
int res = LzmaCompress(
(unsigned char*)(*outBuf + LZMA_PROPS_SIZE), dstLen,
inBuf, srcLen,
*outBuf, &propsSize,
-1, 0, -1, -1, -1, -1, -1);
assert(res == SZ_OK);
*dstLen = *dstLen + LZMA_PROPS_SIZE;
}
void uncompress(
unsigned char **outBuf, size_t *dstLen,
unsigned char *inBuf, size_t srcLen
) {
*dstLen = 5000000;
*outBuf = (unsigned char*)malloc(*dstLen);
srcLen = srcLen - LZMA_PROPS_SIZE;
int res = LzmaUncompress(
*outBuf, dstLen,
(unsigned char*)(inBuf + LZMA_PROPS_SIZE), &srcLen,
inBuf, LZMA_PROPS_SIZE);
assert(res == SZ_OK);
}
void do_compress() {
FILE* file = fopen("Module.dll", "r");
size_t size, decSize;
unsigned char *data, *dec = NULL;
fseek(file, 0L, SEEK_END);
size = ftell(file);
fseek(file, 0L, SEEK_SET);
data = (unsigned char*)malloc(size);
fread(data, 1, size, file);
fclose(file);
compress((unsigned char**)&dec, &decSize, data, size);
file = fopen("Module.lzma", "w");
fwrite(dec, 1, decSize, file);
fclose(file);
}
void do_uncompress() {
FILE* file = fopen("Module.lzma", "r");
size_t size, decSize;
unsigned char *data, *dec = NULL;
fseek(file, 0L, SEEK_END);
size = ftell(file);
fseek(file, 0L, SEEK_SET);
data = (unsigned char*)malloc(size);
fread(data, 1, size, file);
fclose(file);
uncompress((unsigned char**)&dec, &decSize, data, size);
file = fopen("Module_DEC.dll", "w");
fwrite(dec, 1, decSize, file);
fclose(file);
}
int main()
{
do_compress();
do_uncompress();
return 0;
}
If this code is not the better way to compress buffers with LzmaLib, I'm happy to accept suggestions.
I bet the problem lurks in how you read/write your files. You need to open them in binary mode to prevent any substitutions during read/write operations.
Change all instances of:
fopen(xxx, "r") -> fopen(xxx, "rb")
fopen(xxx, "w") -> fopen(xxx, "wb")
I didn't check this specificly for LzmaCompress but most of the other compressing libraries like libz handle that function similar to the standard read/write or fread/fwrite functions, i.e. allowing you to continuously calling the functions to compress more and more data in one stream. So at some point, you will have to say "I'm done, please flush everything not written so far". Possibly, you forgot that part. If not, a Minimal, Complete, and Verifiable example would be cool.
When you compress, you pass the number of compressed output bytes to the caller. But your buffer contains LZMA_PROPS_SIZE additional bytes. So, when writing the lzma file, you actually forget the last LZMA_PROPS_SIZE bytes and on later reading, those are missing.
I'm really new at C programing, so I need to read a big file, split it every point, and write in a new file what I got when splitting, so I have to write several files, the problem is when I'm naming the new files.
I've been working in this proyect like for a week and I can't fix this issue.
Here the code I have so far:
#include <stdio.h>
#include <string.h>
char *myRead(char file_name[]){
char *output;
long size;
FILE *myfile = fopen(file_name,"rb");
fseek(myfile, 0, SEEK_END);
size = ftell(myfile);
rewind(myfile);
output = (char*) malloc(sizeof(char) * size);
fread(output,1,size,myfile);
fclose(myfile);
return output;
}
void myWrite(char content[], int i){
FILE *myfile;
myfile = fopen(i,"w");
fprintf(myfile,"%s",content);
fclose(myfile);
}
void split(char *content){
int word_length = strlen(content);
int i = 0;
char *output = strtok (content,".");
while (output != NULL){
myWrite(output,i);
printf("%s\n", output);
output = strtok (NULL, ".");
i++;
}
}
int main(){
char file_name[] = "hourglass.txt";
char *content = myRead(file_name);
split(content);
return 0;
}
What I want to know it's how can I do several files with just a number for the name?
Change
myfile = fopen(i,"w");
to
char file_name[100];
sprintf(filename, "%d", i);
myfile = fopen(file_name, "w");
That should fix it for you
C's fopen has signature:
FILE* fopen(const char *filename, const char *mode)
when you call with i in myWrite you are telling it a string is located at that address, where likely there is garbage. If this isn't homework explain and I can elaborate but in case it is I'll just point you to itoa.