C++ : how to get the uncompressed size using zlib or lib_tar - zlib

I tried the below code, but it is very slow,
Looking for any optimal way to caluclate the uncompressed data size.
I read somewhere using zstream structure -> inflate api we can speedup the performance. could you please help me to get it done.
/* skip regfile */
int move_file_pointer(TAR *t)
{
int i, k;
size_t size;
char buf[T_BLOCKSIZE];
if (!TH_ISREG(t))
{
errno = EINVAL;
return -1;
}
size = th_get_size(t);
std::printf("\n Current File Position - %lu", gztell(l_gzFile));
long int luOffset= 0;
int reminder = size % T_BLOCKSIZE ;
if(reminder == 0 )
{
luOffset = (size/T_BLOCKSIZE) * T_BLOCKSIZE;
}
else
{
luOffset = ((size/T_BLOCKSIZE) * T_BLOCKSIZE) + T_BLOCKSIZE ;
}
std::printf("\n Targeted Seek offset value %lu", luOffset);
std::printf("\n Targeted Seek offset value %lu", luOffset);
/*
######################################################
If you look here gzseek is taking long time to complete. for small zip file such as 500 MB, it consuming 2 mins to move end of file.
I have TAR *t datatype here.
gzFile l_gzFile file pointer here.
Using these two data types.
How can i seek to end in fast / optimal way.
##################################################### */
k = gzseek(l_gzFile, luOffset , SEEK_CUR); // l_gzFile is from gzFile data type.
if (k == -1)
{
if (k != -1)
errno = EINVAL;
return -1;
}
std::printf("\n After Read Block - %lu", gztell(l_gzFile));
return 0;
}
void getUnTarFileSize(std::string f_cSourePath)
{
std::string dest = "/fs/usb0/untar_zlib/test/";
TAR *l_pTarInfo = NULL;
char *l_pcTarFileSourcePath = const_cast<char * >(f_cSourePath.c_str());
char *l_pcTarFileDestPath = const_cast<char * >(dest.c_str());
//open tar archive
if (0 != (tar_open(&l_pTarInfo, l_pcTarFileSourcePath, &gztype, O_RDONLY, 0, TAR_GNU)))
{
std::printf("tar_open(): %s \n", std::strerror(errno));
}
else
{
int i = 0;
unsigned long totalSize = 0;
unsigned long current_size = 0;
std::printf("\n Current File Position - %lu \n", gztell(l_gzFile));
while ((i = th_read(l_pTarInfo)) == 0)
{
char *fName = th_get_pathname(l_pTarInfo);
current_size = th_get_size(l_pTarInfo);
printf("\n Size of fName %s = %d", fName,current_size);
totalSize += current_size;
if (TH_ISREG(l_pTarInfo) && (move_file_pointer(l_pTarInfo) != 0)) {
fprintf(stderr, "tar_skip_regfile()\n");
printf("\n Value of read=%d, Error=%s\n",i,std::strerror(errno));
break;
}
fName = NULL;
printf("\n\n");
}
if(-1 == i)
{
printf("\n Value of read=%d, Error=%s\n",i,std::strerror(errno));
}
else
{
printf("\n Total Size of given zip file=%d\n",totalSize);
}
}
}
int main()
{
getUnTarFileSize("/fs/usb0/untar_zlib/a.tar.gz");
}

If you are asking how to know the uncompressed size of the contents of a gzip (.gz) file, then the only reliable way is to decompress it. See this answer here for more details.

Related

malloc error shows"corrupted size vs. prev_size" [duplicate]

This question already has answers here:
Why is “while( !feof(file) )” always wrong?
(5 answers)
How to get the string size in bytes?
(6 answers)
What are the valid signatures for C's main() function?
(5 answers)
The Definitive C Book Guide and List
(1 answer)
Closed 1 year ago.
I am trying to write code to test the speed of lz4 compress and decompress
However, when I add the decompress function in the code, the error "corrupted size vs. prev_size" starts to show up.
In my mind, I think the problems should be in the calloc function of src. Because the program stops at free(src) when running debug.
Her is my code.
(the lz4.h and lz4.c is on https://github.com/lz4/lz4/tree/355f60952938e5bd4d45118af720d4b8fb0c8a09
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "lz4.h"
int splitFile(char* fileIn, size_t maxSize);
#include "lz4.c"
int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
int LZ4_compressBound(int inputSize);
int main()
{
clock_t start_lz4_c,end_lz4_c,start_lz4_d,end_lz4_d;
double compress_time,large_time,decompress_time,compress_speed,decompress_speed,small_time_c,small_average_time_c,small_time_d,small_average_time_d;
int64_t num_f=0;
int64_t i;
char * temp_src=0;
int srcSize= 0;
int dstCapacity=16384;
int lz4_compressed_data_size=0,lz4_decompressed_data_size=0;
int size_after_comp=0;
int size_before_comp=0;
int size_of_file=0;
char* compressed_data = malloc((size_t)dstCapacity);
char buff[200];
FILE * fc;
FILE * ft;
//split file
num_f = splitFile("/home/ziruo/research/1stpro/test.txt",16384);
printf("num_f=%ld\n",num_f);
//read the length for file
ft = fopen("/home/ziruo/research/1stpro/test.txt","r");
fseek(ft,0,SEEK_END);
size_of_file = ftell(ft);
printf("file len = %d\n",size_of_file);
fclose(ft);
//main loop
for( i = 1; i <= num_f; i++)
{
char* src=calloc(16384,1);
char* regen_buffer = calloc(srcSize,1);
//read in
sprintf(buff,"/home/ziruo/research/1stpro/test.txt.%03ld",I);
if (compressed_data == NULL)
{
printf("faild to generae storage\n");
}
fc = fopen(buff,"r");
fread(src,16384,1,fc);
srcSize=(int)(strlen(src) + 1);
fclose(fc);
small_time_c = 0;
small_time_d = 0;
if(size_of_file <= 100000000)
{
int a;
for(a = 1;a<=10;a++)
{
start_lz4_c = clock();
dstCapacity= LZ4_compressBound(srcSize);
lz4_compressed_data_size = LZ4_compress_default(src,compressed_data,srcSize,dstCapacity);
end_lz4_c = clock();
start_lz4_d = clock();
lz4_decompressed_data_size = LZ4_decompress_safe(compressed_data,regen_buffer,lz4_compressed_data_size,srcSize);
end_lz4_d = clock();
small_time_c += ((double)(end_lz4_c-start_lz4_c))/CLOCKS_PER_SEC;
small_average_time_c = small_time_c/10;
small_time_d += ((double)(end_lz4_d-start_lz4_d))/CLOCKS_PER_SEC;
small_average_time_d = small_time_d/10;
}
printf("time %f\n",small_average_time_d);
compress_time += small_average_time_c;
decompress_time += small_average_time_d;
}
else
{
start_lz4_c = clock();
dstCapacity= LZ4_compressBound(srcSize);
LZ4_compress_default(src,compressed_data,srcSize,dstCapacity);
end_lz4_c = clock();
large_time = ((double)(end_lz4_c-start_lz4_c))/CLOCKS_PER_SEC;
//printf("time %f\n",large_time);
//compress_time += large_time;
}
//calculate time & speed
size_before_comp += srcSize;
size_after_comp += lz4_compressed_data_size;
printf("decompressed_data_size is %d\n",lz4_decompressed_data_size);
printf("decompression speed: %fMB/s\n",size_after_comp/(decompress_time *1000000));
free(src);
free(regen_buffer);
}
printf("before %d after %d\n",size_before_comp,size_after_comp);
printf("compression speed: %fMB/s\n",size_before_comp/(compress_time*1000000));
printf("compression ratio: %f\n",(float) size_before_comp/size_after_comp);
printf("time used(s): %f\n",compress_time);
printf("decompressed_data_size is %d\n",lz4_decompressed_data_size);
printf("decompression speed: %fMB/s\n",size_after_comp/(decompress_time *1000000));
return 0;
}
int splitFile(char* fileIn, size_t maxSize)
{
int result = 0;
FILE* fIn;
FILE* fOut;
char buffer[1024 * 16];
size_t size;
size_t read;
size_t written;
if ((fileIn != NULL) && (maxSize > 0))
{
fIn = fopen(fileIn, "rb");
if (fIn != NULL)
{
fOut = NULL;
result = 1; // we have at least one part
while (!feof(fIn))
{
// initialize (next) output file if no output file opened
if (fOut == NULL)
{
sprintf(buffer, "%s.%03d", fileIn, result);
fOut = fopen(buffer, "wb");
if (fOut == NULL)
{
result = -1;
break;
}
size = 0;
}
// calculate size of data to be read from input file in order to not exceed maxSize
read = sizeof(buffer);
if ((size + read) > maxSize)
{
read = maxSize - size;
}
// read data from input file
read = fread(buffer, 1, read, fIn);
if (read == 0)
{
result = -1;
break;
}
// write data to output file
written = fwrite(buffer, 1, read, fOut);
if (written != read)
{
result = -1;
break;
}
// update size counter of current output file
size += written;
if (size >= maxSize) // next split?
{
fclose(fOut);
fOut = NULL;
result++;
}
}
// clean up
if (fOut != NULL)
{
fclose(fOut);
}
fclose(fIn);
}
}
return (result);
}

Having malloc(): corrupted top size issue

I am trying to write code to test the speed of lz4 compression instead of using -b code in terminal.
I am using ubuntu Ubuntu 20.04.2 LTS. And visual studio ide.
Here is my code, it has some trouble when running the second time of the for loop in main.
This line:
fread(src,16384,1,fc);
When i = 1, it works as I expected. However, when i = 2, the malloc() problem just pop up.
//The split file function works perfectly.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "lz4.h"
int splitFile(char* fileIn, size_t maxSize);
#include "lz4.c"
int main()
{
time_t start,end;
int compress, decompress;
int64_t num_f=0;
int64_t i;
char* src=malloc(16384);
int srcSize= 16385;//(int)(strlen(src) + 1);
int dstCapacity=0;
int compressed_data_size=0;
char* compressed_data = malloc((size_t)dstCapacity);
char buff[200];
FILE * fc;
num_f = splitFile("/home/ziruo/research/1stpro/test.txt",16384);
printf("num_f=%ld\n",num_f);
start = time(NULL);
for( i = 1; i <= num_f; i++)
{
sprintf(buff,"/home/ziruo/research/1stpro/test.txt.%03d",i);
printf("buff %s\n",&buff);
if (compressed_data == NULL)
{
printf("faild to generae storage\n");
}
fc = fopen(buff,"r");
printf("fc is: %d\n",fc);
fread(src,16384,1,fc);
srcSize=(int)(strlen(src) + 1);
dstCapacity= LZ4_compressBound(srcSize)
compressed_data_size = LZ4_compress_default(src,compressed_data,srcSize,dstCapacity);
LZ4_compress_default(src,compressed_data,srcSize,dstCapacity);
printf("data size ratio %.2f\n", (float)compressed_data_size/srcSize);
}
end = time(NULL);
printf("time used(s): %f\n",difftime(end,start));
return 0;
}
int splitFile(char* fileIn, size_t maxSize)
{
int result = 0;
FILE* fIn;
FILE* fOut;
char buffer[1024 * 16];
size_t size;
size_t read;
size_t written;
if ((fileIn != NULL) && (maxSize > 0))
{
fIn = fopen(fileIn, "rb");
if (fIn != NULL)
{
fOut = NULL;
result = 1; // we have at least one part
while (!feof(fIn))
{
// initialize (next) output file if no output file opened
if (fOut == NULL)
{
sprintf(buffer, "%s.%03d", fileIn, result);
fOut = fopen(buffer, "wb");
if (fOut == NULL)
{
result = -1;
break;
}
size = 0;
}
// calculate size of data to be read from input file in order to not exceed maxSize
read = sizeof(buffer);
if ((size + read) > maxSize)
{
read = maxSize - size;
}
// read data from input file
read = fread(buffer, 1, read, fIn);
if (read == 0)
{
result = -1;
break;
}
// write data to output file
written = fwrite(buffer, 1, read, fOut);
if (written != read)
{
result = -1;
break;
}
// update size counter of current output file
size += written;
if (size >= maxSize) // next split?
{
fclose(fOut);
fOut = NULL;
result++;
}
}
// clean up
if (fOut != NULL)
{
fclose(fOut);
}
fclose(fIn);
}
}
return (result);
}
And here is the picture of error
enter image description here

Compare two binary files in C

I am writing a program to compare two binary files and plot the first difference. I want to read 16 bytes of data from each file continuously and compare them. For that I am storing 16 bytes from both file into char *buffer1, buffer2. When I print the output I am getting that buffer1 has both the data of file1 and file2.
The code is as follows:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void printConversion(char *buf1, char *buf2) {
size_t len = strlen(buf1);
char *binary = malloc(len * 8 + 1);
binary[0] = '\0';
for (size_t i = 0; i < len; ++i) {
char ch = buf1[i];
for (int j = 7; j >= 0; --j) {
if (ch & (1 << j)) {
strcat(binary,"1");
} else {
strcat(binary,"0");
}
}
}
printf("File1: %s\t", binary);
free(binary);
printf("File2:");
for (int i = 0; i < sizeof(buf2); i++) {
printf("%x", buf2[i] - '0');
}
}
void fileRead(FILE *fp, char *buf, int count) {
fseek(fp, count, SEEK_SET);
fread(buf, 1, 16, fp);
}
int fileSize(FILE *fp) {
fseek(fp, 0, SEEK_END);
int size = ftell(fp) + 1;
return size;
}
int main(int argc, char *argv[]) {
printf("***Binary File Comparator***\n ");
int count = 0;
int index = 0;
char buffer1[16];
char buffer2[16];
char buffer3[16];
char buffer4[16];
// Invalid Number of Arguments
if (argc < 3 || argc > 3) {
printf("Invalid Number of Arguments\n");
}
FILE *fp1, *fp2;
fp1 = fopen(argv[1], "rb");
int size = fileSize(fp1);
int size1 = size;
fclose(fp1);
while (size > 1) {
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer1, count);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer2, count);
if (size1 < count) {
int lastSize = count - size1;
count = count + lastSize;
fclose(fp2);
} else {
count = count+16;
fclose(fp2);
}
**printf("buffer1:%s\tbuffer2:%s\n", buffer1, buffer2)**;
size = size - 16;
int result = strcmp(buffer1, buffer2);
if (result != 0) {
for (int i = 0; i < sizeof(buffer1); i++) {
if (buffer1[i] != buffer2[i]) {
int count1 = (count - 16) + i;
index++;
if (index == 1) {
printf("Byte_Offset:%x\n", count1);
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer3, count1);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer4, count1);
fclose(fp2);
printConversion(buffer3, buffer4);
break;
}
} else {
continue;
}
}
}
}
}
I have tried to highlight the printf part that is printing my buffer1 and buffer2
The output is as follows:
buffer1:83867715933586928386771593358692 buffer2:8386771593358692
buffer1:49216227905963264921622790596326 buffer2:4921622790596326
buffer1:40267236116867294026723611686729 buffer2:4026723611686729
buffer1:82306223673529228230622367352922 buffer2:8230622367352922
buffer1:25869679356114222586967935611422 buffer2:2586967935611422
Can anybody help what I am doing wrong. Please point me the error and what optimization changes could be done in code. I am at learning stage your feedback will be very helpful.
You are complicating the task by reading 16 bytes at a time. If the goal is to indicate the first difference, just read one byte at a time from both files with getc() this way:
int compare_files(FILE *fp1, FILE *fp2) {
unsigned long pos;
int c1, c2;
for (pos = 0;; pos++) {
c1 = getc(fp1);
c2 = getc(fp2);
if (c1 != c2 || c1 == EOF)
break;
}
if (c1 == c2) {
printf("files are identical and have %lu bytes\n", pos);
return 0; // files are identical
} else
if (c1 == EOF) {
printf("file1 is included in file2, the first %lu bytes are identical\n", pos);
return 1;
} else
if (c2 == EOF) {
printf("file2 is included in file1, the first %lu bytes are identical\n", pos);
return 2;
} else {
printf("file1 and file2 differ at position %lu: 0x%02X <> 0x%02X\n", pos, c1, c2);
return 3;
}
}
In terms of efficiency, reading one byte at a time does not pose a problem if the streams are buffered. For large files, you can get better performance by memory mapping the file contents if available on the target system and for the given input streams.
Not an actual answer, but a word on optimisation. You can increase the speed of the program if you have a bigger buffer. Basically the larger the buffer the faster the program runs HOWEVER the speed you gain from just making it larger will increase logarithmically.
Here is a picture of a graph that will help you understand. Also, what i mentioned applies to any simmilar situation. This includes: Copying files, filling the sound buffer etc. Loading the entire file in your RAM first and operationg on it will usually be faster than loading parts of it. Ofc this is not possible with larger files but still this is what you should aim for if you want speed.
PS: I'm writting here because i don't have rep to comment.
EDIT: I came up with solution but since you did not state what you need to do with your buffer3 and buffer4 i packed it up inside a function.
If you are sure that you are only going to use 16 bytes as a buffer size, remove the nBufferSize parameter and replace the buffer dynamic allocation with a static one.
If after the execution you need the buffers, add them as parameters and keep the nBufferSize param. Keep in mind that if you intend to use them outside the function, you should also allocate them outside the function, so things don't get messy.
/** Returns 0 if files are identical, 1 if they are different and -1 if there
is an error. */
int FileCmp(char* szFile1, char* szFile2, int nBufferSize)
{
FILE *f1, *f2;
f1 = fopen(szFile1, "rb");
f2 = fopen(szFile2, "rb");
// Some error checking?
if (f1 == NULL || f2 == NULL)
return -1;
// You can check here for file sizes before you start comparing them.
// ...
// Start the comparrison.
/// Replace this part with static allocation. --------
char* lpBuffer1 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer1 == NULL) // close the files and return error.
{
fclose(f1);
fclose(f2);
return -1;
}
char* lpBuffer2 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer2 == NULL) // close the files, free buffer1 and return error.
{
free(lpBuffer1);
fclose(f1);
fclose(f2);
return -1;
}
/// --------------------------------------------------
while(1)
{
unsigned int uRead1 = fread(lpBuffer1, sizeof(char), nBufferSize, f1);
unsigned int uRead2 = fread(lpBuffer2, sizeof(char), nBufferSize, f2);
if (uRead1 != uRead2)
goto lFilesAreDifferent;
for(unsigned int i = 0; i < uRead1; i++)
if (lpBuffer1[i] != lpBuffer2[i])
goto lFilesAreDifferent;
if ((feof(f1) != 0) && (feof(f2) != 0))
break; // both files have nothing more to read and are identical.
goto lSkip;
lFilesAreDifferent:
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 1;
lSkip:;
}
// The files are the same. Close them, free the buffers and return 0.
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 0;
}
A simple Demo:
#define BUFFER_SIZE 16
int main(int nArgs, char** szArgs)
{
if (nArgs != 3)
{
printf("Invalid number of arguments.");
return 0;
}
int nResult = FileCmp(szArgs[1], szArgs[2], BUFFER_SIZE);
switch (nResult)
{
case 0: printf("Files [%s] and [%s] are identical.", szArgs[1], szArgs[2]); break;
case 1: printf("Files [%s] and [%s] are different.", szArgs[1], szArgs[2]); break;
case -1: printf("Error."); break;
}
return 0;
}
EDIT II: Personally, i have never used the C standard FILE library (it was either C++ fstream or pure win32 fileapi) so don't take my word here for granted but fread is the fastest function i could find (faster than fgets or fgetc). If you want even faster than this you should get into OS dependant functions (like ReadFile() for Windows).
chqrlie's solution using getc is absolutely the right way to do this. I wanted to address some points brought up in comments, and find it's best to do that with code. In one comment, I recommend pseudo code which could be confusing (namely, you can't write fwrite(file1...) || fwrite(file2 ...) because of the short circuit. But you can implement the idea of that with:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/*
* Compare two files, 16 bytes at a time. (Purely to demonstrate memcmp.
* Clearly, this should be implemented with getc.)
*/
FILE * xfopen(const char *, const char *);
size_t xfread(void *, FILE *, const char *);
int
main(int argc, char **argv)
{
FILE *fp[2];
size_t n[2];
char buf[2][16];
unsigned count = 0;
if(argc != 3) { return EXIT_FAILURE; }
fp[0] = xfopen(argv[1], "r");
fp[1] = xfopen(argv[2], "r");
do {
n[0] = xfread(buf[0], fp[0], argv[1]);
n[1] = xfread(buf[1], fp[1], argv[2]);
if( n[0] != n[1] || (n[0] && memcmp(buf[0], buf[1], n[0]))) {
fprintf(stderr, "files differ in block %u\n", count);
return 1;
}
count += 1;
} while(n[0]);
puts("files are identical");
return 0;
}
size_t
xfread(void *b, FILE *fp, const char *name)
{
size_t n = fread(b, 1, 16, fp);
if(n == 0 && ferror(fp)) {
fprintf(stderr, "Error reading %s\n", name);
exit(EXIT_FAILURE);
}
return n;
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = strcmp(path, "-") ? fopen(path, mode) : stdin;
if( fp == NULL ) {
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}

Write structure to a binary file, read it from file, store in buffer, print the buffer

I am trying to write a structure to a binary file, the punctuation will be received as a parameter in the insertInFile function, (working with raspberryPi, and store the values of the time reaction with switches). Then I want to read from the file and and store the values on the heap and finally read those values I stored dynamically. I'm having problems with this because either it cannot read properly or it cannot write to the binary file.
typedef enum dificulty {
EASY, MEDIUM, HARD
} Dificulty;
typedef struct player {
char nickname[MAXSIZE];
enum dificulty Dificulty;
float pontuacion;
} Player;
#define LED_CYCLES 5
#define MAX_PLAYERS 2
int insertInFile(float const *const timeReceived, unsigned short *level, char *playerName) {
Player players[MAX_PLAYERS];
int count = 0, i;
FILE *fplayers;
//check if file can be opened
if ((fplayers = fopen("game.bin", "wb")) == NULL) {
fputs("Erro ao abrir ficheiro\n", stderror);
return (-1);
}
//go to the beginning of the file
rewind(fplayers);
//cycle that allows to save int the bin file the struct "Player"
for (i = 0; i < count; i++) {
Player[i].pontuacion = &timeReceived[count];
Player[i].Dificulty = &level;
Player[i].nickname = &playerName;
fwrite(&players, sizeof (Player), count, fplayers);
}
//close the bin file
fclose(fplayers);
return 0;
}
void obtainFromFile() {
Player players;
int count = 0;
FILE *fplayers;
size_t size;
unsigned char *buffer;
int i;
//open file
fp = fopen("game.bin", "rb");
fseek(fp, 0, SEEK_END);
size = ftell(fp); // calculate the size needed
fseek(fp, 0, SEEK_SET);
buffer = (unsigned char *)malloc(size);
if (fplayers == NULL) { // check some error if file == empty
printf("Error\n", stderr);
return (-1);
} else
if (fread(&buffer, sizeof (*buffer), size, fp) != size) // if count of read bytes != calculated size of .bin file -> ERROR
printf("Error\n", stderr);
return (-1);
} else {
for (i = 0; i < size; i++) {
printf("%02x", buffer[i]);
}
}
fclose(fp);
free(buffer);
}
In insertInFile() there is a misunderstanding of the structure use. Writing Player.pontuacion is not right.
To assign a value in an array of struct Player
players[MAX_PLAYERS];, use players[i].Dificulty = (Dificulty)level.
//cicle that allows to save int the bin file the struct "Player"
for (i = 0; i < count; i++) {
players[i].pontuacion = timeReceived[count];
players[i].Dificulty = (Dificulty)(level[count]);
strncpy(players[i].nickname,playerName,MAXSIZE-1);
players[i].nickname[MAXSIZE]='\0';
}
// write count players
fwrite(&(players[0]), sizeof (Player), count, fplayers);
Instead of:
for (i = 0; i < count; i++) {
Player.pontuacion = &timeReceived[count];
Player.Dificulty = &level;
Player.nickname = &playerName;
fwrite(&players, sizeof (Player), count, fplayers);
}

split file in c error get buffer in readfile

I program a program to split file in C in Ubuntu.
I have error when get buffer in readfile.
here is my code.
int split(char *filename, unsigned long part) {
FILE *fp;
char *buffer;
size_t result; // bytes read
off_t fileSize;
fp = fopen(filename, "rb");
if (fp == NULL) {
fprintf(stderr, "Cannot Open %s", filename);
exit(2);
}
// Get Size
fileSize = get_file_size(filename);
// Buffer
buffer = (char*) malloc(sizeof(char) * (fileSize + 1));
if (buffer == NULL) {
fputs("Memory error", stderr);
fclose(fp);
return 1;
}
// Copy file into buffer
//char buffers[11];
result = fread(buffer, 1, fileSize, fp);
buffer[fileSize] = '\0';
if (result != fileSize) {
fputs("Reading error", stderr);
return 1;
}
// Split file
off_t partSize = fileSize / part;
// Last Part
off_t lastPartSize = fileSize - partSize * part;
unsigned long i;
unsigned long j;
// create part 1 to n-1
for (j = 0; j < part; j++) {
char partName[255];
char *content;
char partNumber[3];
// Content of file part
// for (i = j; i < partSize * (j + 1); i++) {
//
// }
content = (char*) malloc(sizeof(char) * partSize);
content = copychar(buffer, j + i, partSize + i);
i += partSize;
//copy name
strcpy(partName, filename);
// part Number
sprintf(partNumber, "%d", j);
// file name with .part1 2 3 4 ....
strcat(partName, ".part");
strcat(partName, partNumber);
// Write to file
writeFile(partName, content);
free(content);
}
// last part
char *content;
content = (char*) malloc(sizeof(char) * (fileSize - partSize * (part - 1)));
content = copychar(buffer, (part - 1) * partSize + 1, fileSize);
char lastPartNumber[3];
char lastPartName[255];
sprintf(lastPartNumber, "%d", part);
strcpy(lastPartName, filename);
strcat(lastPartName, ".part");
strcat(lastPartName, lastPartNumber);
writeFile(lastPartName, content);
free(content);
free(buffer);
fclose(fp);
return 0;
}
here is function copychar from start to end
char *copychar(char* buffer, unsigned long start, unsigned long end) {
if (start >= end)
return NULL;
char *result;
result = (char*) malloc(sizeof(char) * (end - start) + 1);
unsigned long i;
for (i = start; i <= end; i++)
result[i] = buffer[i];
result[end] = '\0';
return result;
}
here is function to get filesize
off_t get_file_size(char *filename) {
struct stat st;
if (stat(filename, &st) == 0)
return st.st_size;
fprintf(stderr, "Cannot determine size of %s: %s\n", filename);
return -1;
}
here is function to write file
int writeFile(char* filename, char*buffer) {
if (buffer == NULL || filename == NULL)
return 1;
FILE *file;
file = fopen(filename, "wb");
fwrite(buffer, sizeof(char), sizeof(buffer) + 1, file);
fclose(file);
return 0;
}
When I test I use file test 29MB and it dumped.
I debug It return fileSize true but when readfile in buffer get from file it only return 135 characters and when use copychar it error.
Breakpoint 1, 0x0000000000400a0b in copychar (buffer=0x7ffff5e3a010 "!<arch>\ndebian-binary 1342169369 0 0 100644 4 `\n2.0\ncontrol.tar.gz 1342169369 0 0 100644 4557 `\n\037\213\b", start=4154703576, end=4164450461) at final.c:43
Program received signal SIGSEGV, Segmentation fault.
0x0000000000400a0b in copychar (buffer=0x7ffff5e3a010 "!<arch>\ndebian-binary 1342169369 0 0 100644 4 `\n2.0\ncontrol.tar.gz 1342169369 0 0 100644 4557 `\n\037\213\b", start=4154703576, end=4164450461) at final.c:43
Program terminated with signal SIGSEGV, Segmentation fault.
The program no longer exists.
I don't know how to devide buffer into part to write into part when split.
Thank for advance!
It's highly impractical to copy files in 1 big block as you may have noticed. And it's not needed.
At the simplest level you could copy the file byte by byte, like this
while( ( ch = fgetc(source) ) != EOF ) {
fputc(ch, target);
}
Which will work, but it will be quite slow. Better to copy in blocks, like this:
unsigned char buf[4096];
size_t size;
while( (size = fread(buf, 1, sizeof(buf), fpRead) ) > 0) {
fwrite(buf, 1, size, fpWrite);
}
Notice that the resulting code is way simpler and contains no dynamic memory allocation.
You still need to add the splitting logic of course, but that can be done by tracking the number of bytes written and opening a new write-file before actually writing it.
EDIT: how to handle the multipart facet - schematically, you still need to implement extra checks for some special cases and test results of the different system calls of course
unsigned char buf[4096];
size_t size;
size_t partsize = 100000; // asssuming you want to write 100k parts.
size_t stilltobewritten = partsize; // bytes remaining to be written in current part
size_t chunksize = sizeof(buf); // first time around we read full buffersize
while( (size = fread(buf, 1, chunksize, fpRead) ) > 0) {
fwrite(buf, 1, size, fpWrite);
stilltobewritten -= size; // subtract bytes written from saldo
if (stilltobewritten == 0) {
// part is complete, close this part and open next
fclose(fpWrite);
fpWrite = fopen(nextpart,"wb");
// and reinit variables
stilltobewritten = partsize;
chunksize = sizeof(buf);
} else {
// prep next round on present file - just the special case of the last block
// to handle
chunksize = (stilltobewritten > sizeof(buf)) ? sizeof(buf) : stilltobewritten;
}
}
and EDIT 2: the file part name can be made a LOT simpler as well:
sprintf(partName, "%s.part%d",file, j);
concerning the original code, there's some confusion about start and end in the copychar. First, you probably meant sizeof(char) * (end - start + 1) rather than sizeof(char) * (end - start) + 1 in the malloc, second, you're copying end-start+1 symbols from the original buffer (for (i = start; i <= end; i++)) and then overwrite the last one with '\0', which probably isn't the intended behavior.

Resources