split files into fixed-size blocks in C - c

I am asked to split files into fixed-size blocks to do the file encryption. More specifically, I have lots of files, which could be binary or text.
I am asked to write a user program that takes as input these files, splits each file into say many 32-bit blocks, and then sends out the 32-bit blocks.
The scenario is that the user backups their files in a remote server by splitting their files into blocks, encrypting the blocks (using RSA), and then sending the ciphertexts to the server.
Consider two files, one is a 33-bit text file A and another is a 34-bit binary file B.
A can be partitioned into two 32-bit blocks A1 and A2 (the last 31 bis of A2 are all 0's).
B can be partitioned into two 32-bit blocks B1 and B2 (the last 30 bis of B2 are all 0's).
If I obtain A1 (or A2, B1, B2), then I regard A1 as a 32-bit integer and can do the RSA encryption.
I am able to write code for RSA encryption, but unfortunately I have no idea about how to write the C code for obtaining A1, A2, B1, B2.
Can anyone help me write a sample code or give me some reference?

Below is the high-level algorithm which you can easily convert to C code:
char* get_Next_Block_From_File(FILE *fp, int seek_offset, int blockSize)
{
// Open file
// lseek to seek_offset
len = blockSize / 8;
bit_pos = blockSize % 8;
for (i=0; i<len; i++) {
copy_from_file_to_local_buffer_byte_by_byte();
}
if (bit_pos) {
byte[i] <<= (8 - bit_pos);
append_byte_to_local_buffer();
}
return local_buffer;
}

C Program for splitting the given file into fixed size blocks
// split.cpp : main project file.
#include "stdafx.h"
using namespace System;
#include <stdio.h>
#include <string.h>
#include <conio.h>
#define SEGMENT 600 //approximate target size of small file
long file_size(char *name);//function definition below
int splitFile(char *fp1, size_t maxSize);
long file_size(char *name)
{
FILE *fp = fopen(name, "rb"); //must be binary read to get bytes
long size=-1;
if(fp)
{
fseek (fp, 0, SEEK_END);
size = ftell(fp)+1;
fclose(fp);
}
return size;
}
/*
** splitFile
** Splits an existing input file into multiple output files with a specified
** maximum file size.
**
** Return Value:
** Number of created result files, or 0 in case of bad input data or a negative
** value in case of an error during file splitting.
*/
int splitFile()
{
int result = 0;
char buffer[1024];//change see //2098 * 16
size_t size;
size_t read;
size_t written;
int segments=0, i, len, accum;
//FILE *fp1, *fp2;
char filename[260]={""};
//char filename[260]={"D:\\smallFileName_"};//base name for small files.
char largeFileName[]={"D:\\Payload_data\\Datafile_to_split.txt"};//change to your path
long sizeFile = file_size(largeFileName);
char smallFileName[260];
char line[1024];
long maxSize= sizeFile/SEGMENT + 1;
int filecounter=1;
FILE *fIn;
FILE *fOut;
fIn = fopen(largeFileName, "rb");
if ((fIn != NULL) && (maxSize > 0))
{
// fIn = fopen(fIn, "rb");
if (fIn != NULL)
{
fOut = NULL;
result = 1; /* we have at least one part */
// Splitting Data from pchar into multiple files
for(i=1; i<25 ;i++)
{
sprintf(smallFileName, "%s%d.txt", filename, i);
printf("\n File number %d",i);
while (!feof(fIn))
{
/* initialize (next) output file if no output file opened */
if (fOut == NULL)
{
filecounter++;
sprintf(buffer, "smallFileName_%d", filecounter);
fOut = fopen(buffer, "wb");
if (fOut == NULL)
{
result *= -1;
break;
}
size = 0;
}
/* calculate size of data to be read from input file in order to not exceed maxSize */
read = sizeof(buffer);
if ((size + read) > maxSize)
{
read = maxSize - size;
}
/* read data from input file */
read = fread(buffer, 1, read, fIn);
if (read == 0)
{
result *= -1;
break;
}
/* write data to output file */
written = fwrite(buffer, 1, read, fOut);
if (written != read)
{
result *= -1;
break;
}
/* update size counter of current output file */
size += written;
if (size >= maxSize) /* next split? */
{
fclose(fOut);
fOut = NULL;
result++;
}
}
/* clean up */
if (fOut != NULL)
{
fclose(fOut);
}
fclose(fIn);
}
}
}
return (result);
}
int main(void)
{
//segments = sizeFile/SEGMENT + 1;//ensure end of file
//fp1 = fopen(largeFileName, "r");
////if(fp1)
////{
// for(i=0;i<segments;i++)
// {
// accum = 0;
// sprintf(smallFileName, "%s%d.txt", filename, i);
// fp2 = fopen(smallFileName, "w");
// if(fp2)
// {
// while(fgets(line, 1080, fp1) && accum <= SEGMENT)
// {
// accum += strlen(line);//track size of growing file
// fputs(line, fp2);
// }
// fclose(fp2);
// }
// }
// fclose(fp1);
//}
//maxSize =sizeFile/SEGMENT + 1;
//ensure end of file
splitFile();
getch();
printf("\n File splitted Successfully");
return 0;
}

Related

Memory problems with a course exercise in C

I'm following a course about programming and went smooth so far. Now I'm stuck into this exercise which asks us to reverse a WAV file in C.
I have two questions about it:
It seems to work and correctly reverse few seconds audios but on longer ones it doesn't anymore. Why?
I previously added a get_data_size function to know a value to use later in the program, but after discovering the fseek() function it wasn't needed anymore. So I wanted to delete it, but if I do I get the "segmentation fault" error, while if I keep it it just runs ok. Why?
EDIT: Ok, now MAGICALLY, I tried to delete the get_data size function again and compiled it... and it doesn't go to segfault anymore. without changing anything. It still doesn't reverse the longer files though.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "wav.h"
int check_format(WAVHEADER header);
int get_block_size(WAVHEADER header);
long get_data_size(FILE *input, int size);
int main(int argc, char *argv[])
{
// Ensure proper usage
if (argc != 3)
{
printf("insert input and output names");
return 2;
}
// Open input file for reading
char *infile = argv[1];
char *outfile = argv[2];
FILE *input = fopen(infile, "r");
if (input == NULL)
{
printf("could not open the file\n");
return 1;
}
// Read header into an array
WAVHEADER inwav;
fread(&inwav, sizeof(WAVHEADER), 1, input);
// Use check_format to ensure WAV format
if (check_format(inwav) != 0)
{
fclose(input);
printf("not a valid format");
return 3;
}
// Open output file for writing
FILE *output = fopen(outfile, "w");
if (output == NULL)
{
printf("could not open the file\n");
return 1;
}
// Write header to file
fwrite(&inwav, sizeof(WAVHEADER), 1, output);
// Use get_block_size to calculate size of block
int n = get_block_size(inwav);
//not needed anymore
long s = get_data_size(input, n);
//Sets it to the end of the file
fseek(input, n, SEEK_END);
// Write audio to file from the last block to the first
while (ftell(input) != sizeof(WAVHEADER))
{
WORD temp;
fread(&temp, n, 1, input);
fwrite(&temp, n , 1, output);
fseek(input, -2 * n, SEEK_CUR);
}
fclose(input);
fclose(output);
return 0;
}
int check_format(WAVHEADER header)
{
// Checks for the WAVE spelt in the header
if (header.format[0] != 'W' && header.format[1] != 'A' && header.format[2] != 'V' && header.format[3] != 'E')
{
return 1;
}
return 0;
}
int get_block_size(WAVHEADER header)
{
//Double checks the channels are either 1 or 2 and calculates the BYTES per audio block
if (header.numChannels == 1 || header.numChannels ==2)
{
int size = (header.bitsPerSample / 8) * header.numChannels;
return size;
}
else
{
printf("not supported\n");
return 0;
}
return 0;
}
//not needed anymore
long get_data_size(FILE *input, int size)
{
long data = 0;
int temp;
while (!feof(input))
{
fread (&temp, 1, 1, input);
data++;
}
return data;
}
```
#include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t WORD;
typedef uint32_t DWORD;
typedef struct
{
BYTE chunkID[4];
DWORD chunkSize;
BYTE format[4];
BYTE subchunk1ID[4];
DWORD subchunk1Size;
WORD audioFormat;
WORD numChannels;
DWORD sampleRate;
DWORD byteRate;
WORD blockAlign;
WORD bitsPerSample;
BYTE subchunk2ID[4];
DWORD subchunk2Size;
} __attribute__((__packed__))
WAVHEADER;

I cannot figure out why fread will not populate my buffer properly. CS50 recovery

I've torn through this code in gdb for hours. I know that fread is returning the appropriate amount of bytes (512). Even checked the $eax print to confirm. Is there any chance someone may be able to give me a hint at what's wrong with my logic?
I thought that the headers may be offset from the beginning of the file, so I figured on first read going through byte by byte looking for the hex match and setting fseek would do the trick. No such luck. Printing match results in a 0.
#include <stdio.h>
#include <getopt.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
typedef uint8_t BYTE;
typedef enum { false, true } boolean;
int main(int argc, char *argv[])
{
// get filenames from cml input and open file
char *infile = argv[optind];
char *fileName = "image";
FILE *rawData = fopen(infile, "r");
FILE *imgJPG = fopen(fileName, "w");
int match = 0;
int imgCnt = 0;
// buffer to hold 512 bytes of file data - FAT file system
BYTE *FATbuffer = (BYTE *)malloc(sizeof(BYTE) * 512);
if (rawData == NULL)
{
printf("Error processing file. Exiting...");
return 1;
}
// begin reading raw data and writing it to buffer
while (fread(FATbuffer, sizeof(BYTE), 512, rawData) == 512)
{
if (imgCnt == 0)
{
for (int c = 0; c < 512; c++)
{
if (FATbuffer[c + 0] == 0xff &&
FATbuffer[c + 1] == 0x8d &&
FATbuffer[c + 2] == 0xff)
{
fseek(rawData, c, SEEK_SET);
imgCnt++;
match++;
}
}
}
else
{
if (FATbuffer[0] == 0xff &&
FATbuffer[1] == 0x8d &&
FATbuffer[2] == 0xff &&
imgCnt > 0)
{
sprintf(fileName, "%d.jpg", imgCnt);
fclose(imgJPG);
imgCnt++;
}
if (imgJPG == NULL)
{
printf("Error processing file. Exiting...");
return 3;
}
fwrite(FATbuffer, sizeof(BYTE), 512, imgJPG);
}
}
printf("%d\n", match);
// file processed, free memory
free(FATbuffer);
return 0;
}
There are multiple problems in your code:
you should test if a command line argument is available (after handling options, code you probably removed for posting).
files must be open in binary mode to avoid potential end of translation.
you should delay opening imgJPG until you have found the JPG header.
there is no need to allocate FATbuffer, defining a 512 byte array with automatic storage is fine.
you scan for the JPG signature one block at a time, but you might miss the signature if it spans a 512 byte boundary and you access 2 bytes beyond the end of the FATbuffer array when c is greater than 509.
fseek(rawData, c, SEEK_SET); set the file position at the offset from the beginning of the array, not from the beginning of the file.
sprintf(fileName, "%d.jpg", imgCnt); attempts to overwrite a string constant. This has undefined behavior. You probably meant this:
char fileName[64];
snprintf(fileName, sizeof fileName, "image%d.jpg", imgCnt);
FILE *imgJPG = fopen(fileName, "wb");
Here is a modified version that can extract JPG files embedded anywhere in a data stream:
#include <errno.h>
#include <getopt.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
// get filenames from cml input and open file
if (optind >= argc)
return 1;
char *infile = argv[optind];
FILE *rawData = fopen(infile, "rb"); // open the disk image
char fileName[64];
FILE *imgJPG = NULL;
int imgCnt = 0;
// buffer to hold 512 bytes of file data - FAT file system
// add an extra 2 bytes to match signature across sector boundaries
uint8_t FATbuffer[514];
// begin reading raw data into buffer
int pos = 2;
while (fread(FATbuffer + 2, 1, 512, rawData) == 512) {
for (int c = pos; c < 512; c++) {
if (FATbuffer[c + 0] == 0xff &&
FATbuffer[c + 1] == 0x8d &&
FATbuffer[c + 2] == 0xff) {
// found signature: skip to a new file
if (imgJPG) {
// write the end of the current image
fwrite(FATbuffer + pos, c - pos, 1, imgJPG);
fclose(imgJPG);
}
pos = c;
imgCnt++;
snprintf(fileName, sizeof fileName, "image%d.jpg", imgCnt);
imgJPG = fopen(fileName, "wb");
if (imgJPG == NULL) {
fprintf(stderr, "Cannot create file %s: %s\n",
fileName, strerror(errno));
return 3;
}
}
}
if (imgJPG) {
// write end of block to current image
fwrite(FATbuffer + pos, 512 - pos, 1, imgJPG);
}
// copy the last 2 bytes to test for signature overlapping blocks
FATbuffer[0] = FATbuffer[512];
FATbuffer[1] = FATbuffer[513];
// uncopied file data starts a 0 now.
pos = 0;
}
if (imgJPG) {
// write last 2 bytes to current image
fwrite(FATbuffer, 2, 1, imgJPG);
fclose(imgJPG);
}
printf("%d\n", imgCnt != 0);
printf("%d images extracted\n", imgCnt);
return 0;
}
If you can assume the signature to be at the start of a sector, the code can be simplified:
#include <errno.h>
#include <getopt.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
// get filenames from cml input and open file
if (optind >= argc)
return 1;
char *infile = argv[optind];
FILE *rawData = fopen(infile, "rb"); // open the disk image
char fileName[64];
FILE *imgJPG = NULL;
int imgCnt = 0;
// buffer to hold 512 bytes of file data - FAT file system
uint8_t FATbuffer[512];
// begin reading raw data into buffer
while (fread(FATbuffer, 1, 512, rawData) == 512) {
if (FATbuffer[c + 0] == 0xff &&
FATbuffer[c + 1] == 0x8d &&
FATbuffer[c + 2] == 0xff) {
// found signature: skip to a new file
if (imgJPG) {
fclose(imgJPG);
}
imgCnt++;
snprintf(fileName, sizeof fileName, "image%d.jpg", imgCnt);
imgJPG = fopen(fileName, "wb");
if (imgJPG == NULL) {
fprintf(stderr, "Cannot create file %s: %s\n",
fileName, strerror(errno));
return 3;
}
}
if (imgJPG) {
// write end of block to current image
fwrite(FATbuffer, 512, 1, imgJPG);
}
}
if (imgJPG) {
fclose(imgJPG);
}
printf("%d\n", imgCnt != 0);
printf("%d images extracted\n", imgCnt);
return 0;
}

Compare two binary files in C

I am writing a program to compare two binary files and plot the first difference. I want to read 16 bytes of data from each file continuously and compare them. For that I am storing 16 bytes from both file into char *buffer1, buffer2. When I print the output I am getting that buffer1 has both the data of file1 and file2.
The code is as follows:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void printConversion(char *buf1, char *buf2) {
size_t len = strlen(buf1);
char *binary = malloc(len * 8 + 1);
binary[0] = '\0';
for (size_t i = 0; i < len; ++i) {
char ch = buf1[i];
for (int j = 7; j >= 0; --j) {
if (ch & (1 << j)) {
strcat(binary,"1");
} else {
strcat(binary,"0");
}
}
}
printf("File1: %s\t", binary);
free(binary);
printf("File2:");
for (int i = 0; i < sizeof(buf2); i++) {
printf("%x", buf2[i] - '0');
}
}
void fileRead(FILE *fp, char *buf, int count) {
fseek(fp, count, SEEK_SET);
fread(buf, 1, 16, fp);
}
int fileSize(FILE *fp) {
fseek(fp, 0, SEEK_END);
int size = ftell(fp) + 1;
return size;
}
int main(int argc, char *argv[]) {
printf("***Binary File Comparator***\n ");
int count = 0;
int index = 0;
char buffer1[16];
char buffer2[16];
char buffer3[16];
char buffer4[16];
// Invalid Number of Arguments
if (argc < 3 || argc > 3) {
printf("Invalid Number of Arguments\n");
}
FILE *fp1, *fp2;
fp1 = fopen(argv[1], "rb");
int size = fileSize(fp1);
int size1 = size;
fclose(fp1);
while (size > 1) {
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer1, count);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer2, count);
if (size1 < count) {
int lastSize = count - size1;
count = count + lastSize;
fclose(fp2);
} else {
count = count+16;
fclose(fp2);
}
**printf("buffer1:%s\tbuffer2:%s\n", buffer1, buffer2)**;
size = size - 16;
int result = strcmp(buffer1, buffer2);
if (result != 0) {
for (int i = 0; i < sizeof(buffer1); i++) {
if (buffer1[i] != buffer2[i]) {
int count1 = (count - 16) + i;
index++;
if (index == 1) {
printf("Byte_Offset:%x\n", count1);
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer3, count1);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer4, count1);
fclose(fp2);
printConversion(buffer3, buffer4);
break;
}
} else {
continue;
}
}
}
}
}
I have tried to highlight the printf part that is printing my buffer1 and buffer2
The output is as follows:
buffer1:83867715933586928386771593358692 buffer2:8386771593358692
buffer1:49216227905963264921622790596326 buffer2:4921622790596326
buffer1:40267236116867294026723611686729 buffer2:4026723611686729
buffer1:82306223673529228230622367352922 buffer2:8230622367352922
buffer1:25869679356114222586967935611422 buffer2:2586967935611422
Can anybody help what I am doing wrong. Please point me the error and what optimization changes could be done in code. I am at learning stage your feedback will be very helpful.
You are complicating the task by reading 16 bytes at a time. If the goal is to indicate the first difference, just read one byte at a time from both files with getc() this way:
int compare_files(FILE *fp1, FILE *fp2) {
unsigned long pos;
int c1, c2;
for (pos = 0;; pos++) {
c1 = getc(fp1);
c2 = getc(fp2);
if (c1 != c2 || c1 == EOF)
break;
}
if (c1 == c2) {
printf("files are identical and have %lu bytes\n", pos);
return 0; // files are identical
} else
if (c1 == EOF) {
printf("file1 is included in file2, the first %lu bytes are identical\n", pos);
return 1;
} else
if (c2 == EOF) {
printf("file2 is included in file1, the first %lu bytes are identical\n", pos);
return 2;
} else {
printf("file1 and file2 differ at position %lu: 0x%02X <> 0x%02X\n", pos, c1, c2);
return 3;
}
}
In terms of efficiency, reading one byte at a time does not pose a problem if the streams are buffered. For large files, you can get better performance by memory mapping the file contents if available on the target system and for the given input streams.
Not an actual answer, but a word on optimisation. You can increase the speed of the program if you have a bigger buffer. Basically the larger the buffer the faster the program runs HOWEVER the speed you gain from just making it larger will increase logarithmically.
Here is a picture of a graph that will help you understand. Also, what i mentioned applies to any simmilar situation. This includes: Copying files, filling the sound buffer etc. Loading the entire file in your RAM first and operationg on it will usually be faster than loading parts of it. Ofc this is not possible with larger files but still this is what you should aim for if you want speed.
PS: I'm writting here because i don't have rep to comment.
EDIT: I came up with solution but since you did not state what you need to do with your buffer3 and buffer4 i packed it up inside a function.
If you are sure that you are only going to use 16 bytes as a buffer size, remove the nBufferSize parameter and replace the buffer dynamic allocation with a static one.
If after the execution you need the buffers, add them as parameters and keep the nBufferSize param. Keep in mind that if you intend to use them outside the function, you should also allocate them outside the function, so things don't get messy.
/** Returns 0 if files are identical, 1 if they are different and -1 if there
is an error. */
int FileCmp(char* szFile1, char* szFile2, int nBufferSize)
{
FILE *f1, *f2;
f1 = fopen(szFile1, "rb");
f2 = fopen(szFile2, "rb");
// Some error checking?
if (f1 == NULL || f2 == NULL)
return -1;
// You can check here for file sizes before you start comparing them.
// ...
// Start the comparrison.
/// Replace this part with static allocation. --------
char* lpBuffer1 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer1 == NULL) // close the files and return error.
{
fclose(f1);
fclose(f2);
return -1;
}
char* lpBuffer2 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer2 == NULL) // close the files, free buffer1 and return error.
{
free(lpBuffer1);
fclose(f1);
fclose(f2);
return -1;
}
/// --------------------------------------------------
while(1)
{
unsigned int uRead1 = fread(lpBuffer1, sizeof(char), nBufferSize, f1);
unsigned int uRead2 = fread(lpBuffer2, sizeof(char), nBufferSize, f2);
if (uRead1 != uRead2)
goto lFilesAreDifferent;
for(unsigned int i = 0; i < uRead1; i++)
if (lpBuffer1[i] != lpBuffer2[i])
goto lFilesAreDifferent;
if ((feof(f1) != 0) && (feof(f2) != 0))
break; // both files have nothing more to read and are identical.
goto lSkip;
lFilesAreDifferent:
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 1;
lSkip:;
}
// The files are the same. Close them, free the buffers and return 0.
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 0;
}
A simple Demo:
#define BUFFER_SIZE 16
int main(int nArgs, char** szArgs)
{
if (nArgs != 3)
{
printf("Invalid number of arguments.");
return 0;
}
int nResult = FileCmp(szArgs[1], szArgs[2], BUFFER_SIZE);
switch (nResult)
{
case 0: printf("Files [%s] and [%s] are identical.", szArgs[1], szArgs[2]); break;
case 1: printf("Files [%s] and [%s] are different.", szArgs[1], szArgs[2]); break;
case -1: printf("Error."); break;
}
return 0;
}
EDIT II: Personally, i have never used the C standard FILE library (it was either C++ fstream or pure win32 fileapi) so don't take my word here for granted but fread is the fastest function i could find (faster than fgets or fgetc). If you want even faster than this you should get into OS dependant functions (like ReadFile() for Windows).
chqrlie's solution using getc is absolutely the right way to do this. I wanted to address some points brought up in comments, and find it's best to do that with code. In one comment, I recommend pseudo code which could be confusing (namely, you can't write fwrite(file1...) || fwrite(file2 ...) because of the short circuit. But you can implement the idea of that with:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/*
* Compare two files, 16 bytes at a time. (Purely to demonstrate memcmp.
* Clearly, this should be implemented with getc.)
*/
FILE * xfopen(const char *, const char *);
size_t xfread(void *, FILE *, const char *);
int
main(int argc, char **argv)
{
FILE *fp[2];
size_t n[2];
char buf[2][16];
unsigned count = 0;
if(argc != 3) { return EXIT_FAILURE; }
fp[0] = xfopen(argv[1], "r");
fp[1] = xfopen(argv[2], "r");
do {
n[0] = xfread(buf[0], fp[0], argv[1]);
n[1] = xfread(buf[1], fp[1], argv[2]);
if( n[0] != n[1] || (n[0] && memcmp(buf[0], buf[1], n[0]))) {
fprintf(stderr, "files differ in block %u\n", count);
return 1;
}
count += 1;
} while(n[0]);
puts("files are identical");
return 0;
}
size_t
xfread(void *b, FILE *fp, const char *name)
{
size_t n = fread(b, 1, 16, fp);
if(n == 0 && ferror(fp)) {
fprintf(stderr, "Error reading %s\n", name);
exit(EXIT_FAILURE);
}
return n;
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = strcmp(path, "-") ? fopen(path, mode) : stdin;
if( fp == NULL ) {
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}

Fast double file read in C

I have a large file containing floating point numbers and I want to read them.
52.881 49.779 21.641 37.230 23.417 7.506 120.190 1.240 79.167 82.397 126.502 47.377 112.583 124.590 103.339 5.821 24.566 38.916 42.576
This is just the beggining of the file. It has 10000000 numbers.
I got this code but I don't know how to print the numbers.
#include <stdio.h>
#include <stdlib.h>
#include <err.h>
#include <fcntl.h>
#include <sysexits.h>
#include <unistd.h>
int main()
{
int fd;
size_t bytes_read, bytes_expected = 1000000*sizeof(double);
double *data;
char *infile = "file.dat";
if ((fd = open(infile,O_RDONLY)) < 0)
err(EX_NOINPUT, "%s", infile);
if ((data = malloc(bytes_expected)) == NULL)
err(EX_OSERR, "data malloc");
bytes_read = read(fd, data, bytes_expected);
if (bytes_read != bytes_expected)
err(EX_DATAERR, "Read only %d of %d bytes",
bytes_read, bytes_expected);
/* print all */
free(data);
exit(EX_OK);
}
You are attempting to read a text file as if the data was binary, so you will read some bytes but the double values stored in the array will not be the values that you wanted to read from the file, you can probably do this
FILE *file;
double *array;
size_t count;
const char *infile = "file.dat";
file = fopen(infile, "r");
if (file == NULL)
return -1;
count = 0;
while (fscanf(file, "%*lf") == 1)
count += 1;
rewind(file);
array = malloc(count * sizeof(*array));
if (array == NULL) {
fprintf(stderr, "cannot allocate %zu bytes!\n", count * sizeof(*array));
fclose(file);
return -1;
}
// Read the values into the array
for (size_t i = 0; i < count; ++i) {
fscanf(file, "%lf", &array[i]);
}
// Print the array
for (size_t i = 0; i < count; ++i) {
fprintf(stdout, "%f\n", array[i]);
}
// Release memory
free(array);
Since you want a fast solution, maybe you have to sacrifice memory.
The faster manner of reading a file is in binary form.
Thus, I would obtain the file size with an efficient method,
then I would allocate memory accordingly,
with the idea of uploading the entire file to memory.
There, since memory reading is faster than file reading,
the data can be quickly read by using sscanf(...).
We can also observe that each floating point number
needs at least 3 characters to be stored in a text file:
1 char for the dot ('.'),
1 char for some digit,
and 1 char for
a space (' ') used to separating a value from its succesor in the
file.
Thus, the file size divided by 3 will be the upper bound for the size of the array of doubles.
#include <stdio.h>
int main(void) {
char *filename = "file.dat";
FILE *F = fopen(filename, "rb");
fseek(F, 0L, SEEK_END);
long int filesize = ftell(F);
rewind(F);
char *data = malloc(filesize+1);
fread(data, filesize, 1, F);
data[filesize] = '\0'; // End of string, just in case
fclose(F);
// The desired data will be stored in array:
double *array = malloc(sizeof(double) * filesize/3);
int ret;
int n; // represents the no chars in a sscanf(...) reading
double *a = array;
while (1) { // Infinite loop...
ret = sscanf(data, " %lg%n", a, &n);
if (ret == EOF) break; // <<---- EXIT POINT of the loop
a++;
data += n;
}
long int array_size = a - array + 1;
}

How to read two text files into two dynamically allocated arrays and compare them byte by byte in c?

I'd like to write a program that compares two text files and writes every byte in file two that is different from file one into a third file. The catch is that the text files must be read into dynamically allocated arrays. The arrays are compared byte by byte and any byte in file two's array that is different than file one's array will be put into a third array. Then copy that array into a new text file. How would this be accomplished?
Basically how would the following code get the same result using dynamically allocated arrays?
#include <stdio.h>
int main(int argc, char *argv[])
{
int offset;
int ch1, ch2;
FILE *fh1, *fh2, *fh3=stdout;
if( argc<3 ) {
printf("need two file names\n"); return(1);
}
if(!(fh1 = fopen(argv[1], "r"))) {
printf("cannot open %s\n",argv[1]); return(2);
}
if(!(fh2 = fopen(argv[2], "r"))) {
printf("cannot open %s\n",argv[2]); return(3);
}
if(argc>3) {
if(!(fh3 = fopen(argv[3], "w+"))) {
printf("cannot open %s\n",argv[3]); return(4);
}
}
for(offset = 0; (!feof(fh1)) && (!feof(fh2)); offset++)
{
ch1=ch2='-';
if(!feof(fh1)) ch1 = getc(fh1);
if(!feof(fh2)) ch2 = getc(fh2);
if(ch1 != ch2)
fprintf(fh3,"%d:%c %c\n", offset, ch1, ch2);
else
fprintf(fh3,"%c\n", ch1);
}
return 0;
}
Trivial approach might be using fseek() + ftell() to determine size of file and allocate space then. Another way can be
size_t pos = 0;
size_t allocated = 1024; /* arbitrary value */
void *tmp = malloc(allocated);
BUG_ON(!tmp)
while (!feof(f)) {
size_t l = fread(tmp + pos, 1, allocated - pos, f);
pos += l;
allocated += l + 1024; /* arbitrary value */
tmp = realloc(tmp, allocated);
BUG_ON(!tmp);
}
tmp = realloc(tmp, pos); /* free unneeded space */
When you are under Linux/POSIX, you can mmap the file and access memory directly.

Resources