Compare two binary files in C - c

I am writing a program to compare two binary files and plot the first difference. I want to read 16 bytes of data from each file continuously and compare them. For that I am storing 16 bytes from both file into char *buffer1, buffer2. When I print the output I am getting that buffer1 has both the data of file1 and file2.
The code is as follows:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void printConversion(char *buf1, char *buf2) {
size_t len = strlen(buf1);
char *binary = malloc(len * 8 + 1);
binary[0] = '\0';
for (size_t i = 0; i < len; ++i) {
char ch = buf1[i];
for (int j = 7; j >= 0; --j) {
if (ch & (1 << j)) {
strcat(binary,"1");
} else {
strcat(binary,"0");
}
}
}
printf("File1: %s\t", binary);
free(binary);
printf("File2:");
for (int i = 0; i < sizeof(buf2); i++) {
printf("%x", buf2[i] - '0');
}
}
void fileRead(FILE *fp, char *buf, int count) {
fseek(fp, count, SEEK_SET);
fread(buf, 1, 16, fp);
}
int fileSize(FILE *fp) {
fseek(fp, 0, SEEK_END);
int size = ftell(fp) + 1;
return size;
}
int main(int argc, char *argv[]) {
printf("***Binary File Comparator***\n ");
int count = 0;
int index = 0;
char buffer1[16];
char buffer2[16];
char buffer3[16];
char buffer4[16];
// Invalid Number of Arguments
if (argc < 3 || argc > 3) {
printf("Invalid Number of Arguments\n");
}
FILE *fp1, *fp2;
fp1 = fopen(argv[1], "rb");
int size = fileSize(fp1);
int size1 = size;
fclose(fp1);
while (size > 1) {
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer1, count);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer2, count);
if (size1 < count) {
int lastSize = count - size1;
count = count + lastSize;
fclose(fp2);
} else {
count = count+16;
fclose(fp2);
}
**printf("buffer1:%s\tbuffer2:%s\n", buffer1, buffer2)**;
size = size - 16;
int result = strcmp(buffer1, buffer2);
if (result != 0) {
for (int i = 0; i < sizeof(buffer1); i++) {
if (buffer1[i] != buffer2[i]) {
int count1 = (count - 16) + i;
index++;
if (index == 1) {
printf("Byte_Offset:%x\n", count1);
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer3, count1);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer4, count1);
fclose(fp2);
printConversion(buffer3, buffer4);
break;
}
} else {
continue;
}
}
}
}
}
I have tried to highlight the printf part that is printing my buffer1 and buffer2
The output is as follows:
buffer1:83867715933586928386771593358692 buffer2:8386771593358692
buffer1:49216227905963264921622790596326 buffer2:4921622790596326
buffer1:40267236116867294026723611686729 buffer2:4026723611686729
buffer1:82306223673529228230622367352922 buffer2:8230622367352922
buffer1:25869679356114222586967935611422 buffer2:2586967935611422
Can anybody help what I am doing wrong. Please point me the error and what optimization changes could be done in code. I am at learning stage your feedback will be very helpful.

You are complicating the task by reading 16 bytes at a time. If the goal is to indicate the first difference, just read one byte at a time from both files with getc() this way:
int compare_files(FILE *fp1, FILE *fp2) {
unsigned long pos;
int c1, c2;
for (pos = 0;; pos++) {
c1 = getc(fp1);
c2 = getc(fp2);
if (c1 != c2 || c1 == EOF)
break;
}
if (c1 == c2) {
printf("files are identical and have %lu bytes\n", pos);
return 0; // files are identical
} else
if (c1 == EOF) {
printf("file1 is included in file2, the first %lu bytes are identical\n", pos);
return 1;
} else
if (c2 == EOF) {
printf("file2 is included in file1, the first %lu bytes are identical\n", pos);
return 2;
} else {
printf("file1 and file2 differ at position %lu: 0x%02X <> 0x%02X\n", pos, c1, c2);
return 3;
}
}
In terms of efficiency, reading one byte at a time does not pose a problem if the streams are buffered. For large files, you can get better performance by memory mapping the file contents if available on the target system and for the given input streams.

Not an actual answer, but a word on optimisation. You can increase the speed of the program if you have a bigger buffer. Basically the larger the buffer the faster the program runs HOWEVER the speed you gain from just making it larger will increase logarithmically.
Here is a picture of a graph that will help you understand. Also, what i mentioned applies to any simmilar situation. This includes: Copying files, filling the sound buffer etc. Loading the entire file in your RAM first and operationg on it will usually be faster than loading parts of it. Ofc this is not possible with larger files but still this is what you should aim for if you want speed.
PS: I'm writting here because i don't have rep to comment.
EDIT: I came up with solution but since you did not state what you need to do with your buffer3 and buffer4 i packed it up inside a function.
If you are sure that you are only going to use 16 bytes as a buffer size, remove the nBufferSize parameter and replace the buffer dynamic allocation with a static one.
If after the execution you need the buffers, add them as parameters and keep the nBufferSize param. Keep in mind that if you intend to use them outside the function, you should also allocate them outside the function, so things don't get messy.
/** Returns 0 if files are identical, 1 if they are different and -1 if there
is an error. */
int FileCmp(char* szFile1, char* szFile2, int nBufferSize)
{
FILE *f1, *f2;
f1 = fopen(szFile1, "rb");
f2 = fopen(szFile2, "rb");
// Some error checking?
if (f1 == NULL || f2 == NULL)
return -1;
// You can check here for file sizes before you start comparing them.
// ...
// Start the comparrison.
/// Replace this part with static allocation. --------
char* lpBuffer1 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer1 == NULL) // close the files and return error.
{
fclose(f1);
fclose(f2);
return -1;
}
char* lpBuffer2 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer2 == NULL) // close the files, free buffer1 and return error.
{
free(lpBuffer1);
fclose(f1);
fclose(f2);
return -1;
}
/// --------------------------------------------------
while(1)
{
unsigned int uRead1 = fread(lpBuffer1, sizeof(char), nBufferSize, f1);
unsigned int uRead2 = fread(lpBuffer2, sizeof(char), nBufferSize, f2);
if (uRead1 != uRead2)
goto lFilesAreDifferent;
for(unsigned int i = 0; i < uRead1; i++)
if (lpBuffer1[i] != lpBuffer2[i])
goto lFilesAreDifferent;
if ((feof(f1) != 0) && (feof(f2) != 0))
break; // both files have nothing more to read and are identical.
goto lSkip;
lFilesAreDifferent:
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 1;
lSkip:;
}
// The files are the same. Close them, free the buffers and return 0.
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 0;
}
A simple Demo:
#define BUFFER_SIZE 16
int main(int nArgs, char** szArgs)
{
if (nArgs != 3)
{
printf("Invalid number of arguments.");
return 0;
}
int nResult = FileCmp(szArgs[1], szArgs[2], BUFFER_SIZE);
switch (nResult)
{
case 0: printf("Files [%s] and [%s] are identical.", szArgs[1], szArgs[2]); break;
case 1: printf("Files [%s] and [%s] are different.", szArgs[1], szArgs[2]); break;
case -1: printf("Error."); break;
}
return 0;
}
EDIT II: Personally, i have never used the C standard FILE library (it was either C++ fstream or pure win32 fileapi) so don't take my word here for granted but fread is the fastest function i could find (faster than fgets or fgetc). If you want even faster than this you should get into OS dependant functions (like ReadFile() for Windows).

chqrlie's solution using getc is absolutely the right way to do this. I wanted to address some points brought up in comments, and find it's best to do that with code. In one comment, I recommend pseudo code which could be confusing (namely, you can't write fwrite(file1...) || fwrite(file2 ...) because of the short circuit. But you can implement the idea of that with:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/*
* Compare two files, 16 bytes at a time. (Purely to demonstrate memcmp.
* Clearly, this should be implemented with getc.)
*/
FILE * xfopen(const char *, const char *);
size_t xfread(void *, FILE *, const char *);
int
main(int argc, char **argv)
{
FILE *fp[2];
size_t n[2];
char buf[2][16];
unsigned count = 0;
if(argc != 3) { return EXIT_FAILURE; }
fp[0] = xfopen(argv[1], "r");
fp[1] = xfopen(argv[2], "r");
do {
n[0] = xfread(buf[0], fp[0], argv[1]);
n[1] = xfread(buf[1], fp[1], argv[2]);
if( n[0] != n[1] || (n[0] && memcmp(buf[0], buf[1], n[0]))) {
fprintf(stderr, "files differ in block %u\n", count);
return 1;
}
count += 1;
} while(n[0]);
puts("files are identical");
return 0;
}
size_t
xfread(void *b, FILE *fp, const char *name)
{
size_t n = fread(b, 1, 16, fp);
if(n == 0 && ferror(fp)) {
fprintf(stderr, "Error reading %s\n", name);
exit(EXIT_FAILURE);
}
return n;
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = strcmp(path, "-") ? fopen(path, mode) : stdin;
if( fp == NULL ) {
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}

Related

Print the user input strings into a least sized user input files

Program should read list of filenames, open these files and put their handles in the array of structure, then read strings and print consecutive lines of strings to smallest files by using handles contained in array of structures.
My program puts data from all lines to only one file which is initially the smallest which is false because it should the one which is smallest with every time it prints data into the file. This is my program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <math.h>
struct file_t
{
FILE* f;
int size;
}t[5];
void close_file(struct file_t* f) {
if (f == NULL || f->f == NULL) {
}
else {
fclose(f->f);
}
}
int open_file(struct file_t* f, const char* filename) {
if (f == NULL || filename == NULL) {
return 1;
}
FILE* fp;
fp = fopen(filename, "ab");
if (fp == NULL) {
return 2;
}
long int res = ftell(fp);
fclose(fp);
f->size = res;
f->f = fopen(filename, "ab+");
if (fp == NULL) {
return 2;
}
return 0;
}
struct file_t* find_min(const struct file_t* files, int size) {
if (files == NULL || size <= 0) {
return NULL;
}
int x = (files + 0)->size, i = 0, index = 0;
for (i = 0; i < size; i++) {
if ((files + i)->size <= x) {
x = (files + i)->size;
index = i;
}
}
return (struct file_t*)(files + index);
}
int main() {
puts("Input files' names:");
char tab[100];
int num = 0;
while(1==1){
if(fgets(tab, 100, stdin)==NULL||*tab=='\n'){
if (num == 0) {
printf("Couldn't open file");
return 4;
}
break;
}
int index=strlen(tab);
*(tab+index-1)='\x0';
if (strlen(tab) > 30) {
*(tab + 30) = '\x0';
}
if (open_file((t + num), tab) > 0) {
}
else {
num++;
}
}
if (num == 0) {
printf("Couldn't open file");
return 4;
}
char str[1000];
printf("Input text:");
*str = '\x0';
while (fgets(str, 1000, stdin)==NULL||*str!='\n') {
int index=strlen(str);
*(str+index-1)='\x0';
struct file_t* p = find_min(t, num);
fwrite(str, sizeof(char), strlen(str), p->f);
}
for (int i = 0; i < num; i++) {
close_file(t + i);
}
printf("File saved");
return 0;
}
There are some critical bugs that you need to resolve.
fseek(stdin, 0, SEEK_END) -- fseek normally only work on a disk file, or something reasonably similar. Please refer to this link Using fseek with a file pointer that points to stdin
As a matter of fact even fflush() won't work. fflush is something that is designed for flushing output streams, and its behavior with input streams is implementation-dependent. Please refer to this link for more details stdinflush
scanf("%[^\n]s", tab)
If you are using this in a loop or multiple times, only the first read will succeed. The reason being, the \n character is left out from the previous input, and as said earlier fflush() might not be successful in removing that \n. The further calls to scanf() will simply return without reading anything.
'\0x' If you are intending to use this as string terminator then this is not it. It is a multi-character constant with an integer value 120. Below is a vague test run
Code
#include <stdio.h>
int main()
{
if ('\0' == '\0x' )
printf("both are same\n");
printf("%d",'\0x');
}
Compilation Warnings
test.c: In function ‘main’:
test.c:5:14: warning: multi-character character constant [-Wmultichar]
5 | if ('\0' == '\0x' )
| ^~~~~
test.c:8:14: warning: multi-character character constant [-Wmultichar]
8 | printf("%d",'\0x');
| ^~~~~
Output
120
fseek(fp, 0, SEEK_END); ftell(fp); -- This should not be used to determine the file sizes. The behavior of the fseek() with SEEK_END is undetermined in the case of binary files. Please refer to this link Do not use fseek() and ftell() to compute the size of a regular file
Some Logic Errors
1) You should compute the file size every time in find_min() as it gets changed whenever you write data to the file.
2) fwrite()won't actually dump the data to file immediately. you need to call fflush().
After resolving the above issues, this is the modified code.
Code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <math.h>
#include <sys/stat.h>
struct file_t
{
FILE* f;
int size;
}t[5];
void close_file(struct file_t* f) {
if (f == NULL || f->f == NULL) {
}
else {
fclose(f->f);
}
}
int open_file(struct file_t* f, const char* filename) {
if (f == NULL || filename == NULL) {
return 1;
}
f->f = fopen(filename, "a");
if (f->f == NULL)
return 2;
struct stat statbuf;
fstat(fileno(f->f), &statbuf);
f->size = statbuf.st_size;
return 0;
}
struct file_t* find_min(const struct file_t* files, int size) {
if (files == NULL || size <= 0) {
return NULL;
}
struct stat statbuf;
fstat(fileno(files->f), &statbuf);
int x = statbuf.st_size, i = 0, index = 0;
for (i = 0; i < size; i++) {
fstat(fileno((files+i)->f), &statbuf);
if (statbuf.st_size < x) {
x = statbuf.st_size;
index = i;
}
}
return (struct file_t*)(files + index);
}
int main() {
puts("Input files' names:");
char tab[100];
int num = 0;
while(1){
int c;
while (1) {
c = getc(stdin);
if (c == EOF || c == ' ')
goto user_input;
if(c != '\n')
break;
}
tab[0] = c;
if (scanf("%[^\n]s", tab+1) == EOF)
break;
if (*tab == '\0') {
if (num == 0) {
printf("Couldn't open file");
return 4;
}
break;
}
if (strlen(tab) > 30) {
*(tab + 30) = '\0';
}
if (open_file((t + num), tab) > 0) {
}
else {
num++;
}
*tab = '\0';
}
user_input:
if (num == 0) {
printf("Couldn't open file");
return 4;
}
fflush(stdin);
char str[1000];
printf("Input text:\n");
*str = '\0';
while(1) {
int c;
while(1) {
c = getc(stdin);
if (c == EOF)
goto main_exit;
if (c != '\n')
break;
}
str[0] = c;
if (scanf("%[^\n]s", str+1) == EOF)
break;
struct file_t* p = find_min(t, num);
fwrite(str, sizeof(char), strlen(str), p->f);
fflush(p->f);
}
main_exit:
for (int i = 0; i < num; i++) {
close_file(t + i);
}
printf("File saved");
return 0;
}
Terminal Session
$ ./a.out
Input files' names:
test file1.txt
test file2.txt
' '(NOTE: Space character inputted before pressing enter.)
Input text:
this is
stackoverflow
File saved
test file1.txt
this is
test file2.txt
stackoverflow
Note for breaking from the first loop (Files input). You need to enter space and then press enter (You can tweak around this).
Where are you updating the file_t->size when you write into a file?
You are calling this:
fwrite(str, sizeof(char), strlen(str), p->f);
But after that you should do p->size += strlen(str) to update its size, otherwise all file sizes are set to initial values, and hence all strings get written to a single file.
As for getting garbage data, try printing the string you are reading from scanf in the while loop.
You are using scanf to read characters until '\n', but you are not reading the '\n' itself. You need a fseek(stdin, 0, SEEK_END); in that loop as well.
Finally, why are you using syntax like this:
(files + i)->size
When you can call it more cleanly like this:
files[i].size
You code is really hard to read because of this.

Is there any way to shift content of a file without storing it in array in c?

I am trying to replace words from a file, This works fine with words of the same length.
I know it can be done by storing content in a temporary array and then shifting but I was wondering if it can be done without using array.
#include<stdio.h>
#include<string.h>
int main(int argc, char **argv)
{
char s1[20], s2[20];
FILE *fp = fopen(argv[1], "r+");
strcpy(s1, argv[2]);
strcpy(s2, argv[3]);
int l, i;
while(fscanf(fp, "%s", s1)!=EOF){
if(strcmp(s1, argv[2]) == 0){
l = strlen(s2);
fseek(fp, -l, SEEK_CUR);
i=0;
while(l>0){
fputc(argv[3][i], fp);
i++;
l--;
}
}
}
}
Here is my code for replacing same length words, what can I modify here for different lengths?
Assuming that the OP's goal is to avoid storing the whole content of the file into a byte array (maybe not enough memory) and he also said that it needs to "shift" the file's content, so it cannot use a temp file to make the text replacement (perhaps not enough room in the storage device).
Note that copying into a temp file would be the easiest method.
So as I can see the solution has two algorithms:
Shift to left: Replace a text with another of equal or smaller length.
Shift to right: Replace a text with a longer one.
Shift to left:
Maintain 2 file position pointers: one for the read position (rdPos) and another for the write position (wrPos).
Both start in zero.
read char from rdPos until find the oldText and write it into the wrPos (but only if rdPos != wrPos to avoid unnecessary write operations).
write the newText into wrPos.
repeat from step 3 until EOF.
if len(oldText) > len(newText) then truncate the file
Shift to right:
Maintain 2 file position pointers: (rdPos and wrPos).
scan the whole file to find the number of the oldText occurrences.
store their file positions into a small array (not strictly needed, but useful to avoid a second reverse scan of the oldText)
set rdPos = EOF-1 (the last char in the file)
set wrPos = EOF+foundCount*(len(newText)-len(oldText)): reserving enough extra space for the shifting.
read char from rdPos until find the position in the "found" array and write the char into the wrPos.
write the newText into wrPos.
repeat from step 6 until BOF.
I wrote the following implementation as an example of the mentioned algorithms, but without caring too much about validations and edge cases.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#define MAX_ITEMS 100
#define DO_WRITE 0x01
#define DO_FIND 0x02
FILE *fp;
long rdPos = 0L, wrPos = 0L, rdCount=0L, wrCount=0L;
int newLen, oldLen;
char *newText, *oldText;
struct found_t { int len; long pos[MAX_ITEMS];} found;
/* helper functions */
void writeChars(char *buffer, int len){
if(wrPos < rdPos){
long p = ftell(fp);
fseek(fp, wrPos, SEEK_SET);
fwrite(buffer, len, 1, fp);
fseek(fp, p, SEEK_SET);
wrCount += len;
}
wrPos += len;
}
int nextReadChar = -1;
int readChar(){
int c;
if(nextReadChar == EOF) {
if((c = fgetc(fp)) != EOF)
rdCount++;
} else {
c = nextReadChar;
nextReadChar = EOF;
}
return c;
}
int findFirstChar(int action){
int c; char ch;
for(; (c = readChar()) != EOF && c != (int)oldText[0]; rdPos++)
if(action == DO_WRITE) {
ch = (char)c;
writeChars(&ch, 1);
}
return c;
}
int testOldText(int c, int action){
char *cmp;
for(cmp = oldText; *cmp != '\0' && c == (int)*cmp; cmp++)
c = readChar();
nextReadChar = c;
if(*cmp == '\0') { /* found oldText */
if(action == DO_FIND)
found.pos[found.len++] = rdPos;
rdPos += oldLen;
if(action == DO_WRITE){
writeChars(newText, newLen);
found.len++;
}
}
else { /* some chars were equal */
if(action == DO_WRITE)
writeChars(oldText, cmp-oldText);
rdPos += cmp-oldText;
}
return c;
}
void writeReverseBlock(long firstCharPos){
for(;rdPos >= firstCharPos+oldLen; rdPos--, wrPos--, rdCount++, wrCount++){
int c;
fseek(fp, rdPos, SEEK_SET); c = fgetc(fp);
fseek(fp, wrPos, SEEK_SET); fputc(c, fp);
}
rdPos = firstCharPos-1;
wrPos -= newLen-1;
fseek(fp, wrPos--, SEEK_SET);
fwrite(newText, newLen, 1, fp);
wrCount += newLen;
}
void scanFile(int action){
int c;
do {
if( (c = findFirstChar(DO_WRITE)) == EOF ) break;
}while(testOldText(c, DO_WRITE) != EOF);
}
/** Main Algorithms */
void shiftToLeft(){
scanFile(DO_WRITE);
fflush(fp);
ftruncate(fileno(fp), wrPos);
}
void shiftToRight(){
int i;
scanFile(DO_FIND);
wrPos = --rdPos + found.len * (newLen-oldLen); /* reserve space after EOF */
for(i=found.len-1; i>=0; i--)
writeReverseBlock(found.pos[i]);
}
/* MAIN program */
int main(int argc, char **argv){
if(argc != 4){
fprintf(stderr, "Usage: %s file.ext oldText newText\n", argv[0]);
return 1;
}
if(!(fp = fopen(argv[1], "r+b"))) {
fprintf(stderr, "Cannot open file '%s'\n", argv[1]);
return 2;
}
oldLen = strlen(oldText = strdup(argv[2]));
newLen = strlen(newText = strdup(argv[3]));
found.len = 0;
/* which algorithm? */
if(newLen <= oldLen) shiftToLeft();
else shiftToRight();
fclose(fp);
printf("%7d occurrences\n"
"%7ld bytes read\n"
"%7ld bytes written\n", found.len, rdCount, wrCount);
return 0;
}

How to split a text file into multiple parts in c

What i need to do, is to take a file of n lines, and for every x lines, create a new file with the lines of the original file. An example would be this:
Original File:
stefano
angela
giuseppe
lucrezia
In this case, if x == 2, 3 file would be created, in order:
First file:
stefano
angela
Second FIle:
giuseppe
lucrezia
Third File:
lorenzo
What i've done so far is this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 10
int getlines(FILE *fp)
{
int c = 0;
int ch;
do{
ch = fgetc(fp);
if(ch == '\n')
{
c++;
}
}while(ch != EOF);
fseek(fp, 0 , SEEK_SET);
return c;
}
int ix = 0;
void Split(FILE *fp, FILE **fpo, int step, int lines, int *mem)
{
FILE **fpo2 = NULL;
char * filename = malloc(sizeof(char)*64);
char * ext = ".txt";
char number[2];
for(int i = ix; i < *mem; i++)
{
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
if(!(fpo[i] = fopen(filename, "w")))
{
fprintf(stderr, "Error in writing\n");
exit(EXIT_FAILURE);
}
}
char ch;
int c = 0;
do{
ch = fgetc(fp);
printf("%c", ch);
if(ch == '\n')
{
c++;
}
if(c >= step)
{
c = 0;
ix++;
if(ix >= *mem && (ix*step) <= lines)
{
*mem = *mem + 1;
fpo2 = realloc(fpo, sizeof(FILE*)*(*mem));
Split(fp, fpo2, step, lines, mem);
}
}
putc(ch, fpo[ix]);
}while(ch != EOF);
}
int main()
{
FILE * fp;
if(!(fp = fopen("file.txt", "r")))
{
fprintf(stderr, "Error in opening file\n");
exit(EXIT_FAILURE);
}
int mem = N;
int lines = getlines(fp);
int step = lines/N;
FILE **fpo = malloc(sizeof(FILE *)*N);
Split(fp, fpo, step, lines, &mem);
exit(EXIT_SUCCESS);
}
I'm stack with segmentation error, i couldn't find the bug doing
gdb myprogram
run
bt
I really appreciate any help.
EDIT:
I've changed some things and now it works, but it creates an additional file that contains strange characters. I need to still adjust some things:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 10
int getlines(FILE *fp)
{
int c = 0;
int ch;
do{
ch = fgetc(fp);
if(ch == '\n')
{
c++;
}
}while(ch != EOF);
fseek(fp, 0 , SEEK_SET);
return c;
}
int ix = 0;
void Split(FILE *fp, FILE **fpo, int step, int lines, int *mem)
{
FILE **fpo2 = NULL;
char * ext = ".txt";
for(int i = ix; i < *mem; i++)
{
char * filename = malloc(sizeof(char)*64);
char * number = malloc(sizeof(char)*64);
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
if(!(fpo[i] = fopen(filename, "w")))
{
fprintf(stderr, "Error in writing\n");
exit(EXIT_FAILURE);
}
free(number);
free(filename);
}
char ch;
int c = 0;
do{
ch = fgetc(fp);
printf("%c", ch);
if(ch == '\n')
{
c++;
}
if(c >= step)
{
c = 0;
ix++;
if(ix >= *mem && ((ix-1)*step) <= lines)
{
*mem = *mem + 1;
fpo2 = realloc(fpo, sizeof(FILE*)*(*mem));
Split(fp, fpo2, step, lines, mem);
}
}
putc(ch, fpo[ix]);
}while(ch != EOF);
}
int main()
{
FILE * fp;
if(!(fp = fopen("file.txt", "r")))
{
fprintf(stderr, "Error in opening file\n");
exit(EXIT_FAILURE);
}
int mem = N;
int lines = getlines(fp);
int step = lines/N;
FILE **fpo = malloc(sizeof(FILE *)*N);
Split(fp, fpo, step, lines, &mem);
exit(EXIT_SUCCESS);
}
There are a few problems in your code. But first I think you need to fix the most important thing
int step = lines/N;
Here step is 0 if your input file has less than N lines of text. This is because lines and N both are integer and integer division is rounding down.
I won't fix your code, but I'll help you with it. Some changes I
suggest:
Instead of getlines, use getline(3) from the standard
library.
fseek(fp, 0 , SEEK_SET) is pointless.
In char * filename = malloc(sizeof(char)*64), note that
both arguments to malloc are constant, and the size is arbitrary.
These days, it's safe to allocate filename buffers statically,
either on the stack or with static: char filename[PATH_MAX].
You'll want to use limits.h to get that constant.
Similarly you have no need to dynamically allocate your FILE
pointers.
Instead of
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
use sprintf(filename, "temp%d%s", i+1, ext)
get familiar with err(3) and friends, for your own convenience.
Finally, your recursive Split is -- how shall we say it? -- a nightmare. Your whole program
should be something like:
open input
while getline input
if nlines % N == 0
create output filename with 1 + n/N
open output
write output
nlines++

fgetc misses bytes while reading a file

I use the function fgetc to read each byte of a file, and then write it with printf.
I just noticed that sometimes, fgetc just miss some bytes, when I compare my result with a hex editor.
For example, the first mistake starts around the 118th byte, and a lot of other mistakes randomly ...
Somebody ever experienced this?
This is the code (Windows)
char main(int argc, char* argv[]) {
FILE* fdIn;
FILE* fdOut;
long size = 0;
long i = 0;
char c = 0;
if (argc == 3) {
if ((fdIn = fopen(argv[1], "rt")) == NULL) {
printf("FAIL\n");
return 0;
}
if ((fdOut = fopen(argv[2], "w+")) == NULL) {
printf("FAIL\n");
return 0;
}
fseek(fdIn, 0L, SEEK_END);
size = ftell(fdIn);
fseek(fdIn, 0L, 0);
fprintf(fdOut, "unsigned char shellcode[%ld] = {", size);
while (i < size) {
c = fgetc(fdIn);
if (!(i % 16))
fprintf(fdOut, "\n\t");
fprintf(fdOut, "0x%02X", (unsigned char)c);
if (i != size - 1)
fprintf(fdOut, ", ");
i++;
}
fprintf(fdOut, "\n};\n");
fclose(fdIn);
fclose(fdOut);
printf("SUCCESS");
system("PAUSE");
}
return 0;
}
Open the file in binary mode.
// if ((fdIn = fopen((char*)argv[1], "rt")) == NULL) {
// >.<
if ((fdIn = fopen((char*)argv[1], "rb")) == NULL) {
In text mode, and likely a Windows based machine given the "rt", a '\r', '\n' pair is certainly translated into '\n'. IAC, no translations are needed for OP's goal of a hex dump.
2nd issue: fgetc() returns an int in the range of unsigned char or EOF. Use type int to distinguish EOF from all data input.
// char c = 0;
int c = 0;
...
c = fgetc(fdIn);
// also add
if (c == EOF) break;

C: write (and then read) a string to file using a hash function to locate the byte's position

I want to write up to 1KB into a file, and I want to use a hash function to calculate the position of the byte I'm going to write. Obviously I also want to be able later to retrieve the data in the correct order.
To locate the position of the byte to read/write I use the hash function
f = (index*prime) mod 1024
where index is the index in the string, and prime is a prime number that I need to avoid collisions, i.e. not rewriting two times in the same position.
f, strlen (b
First I create the file
dd bs=1024 count=1 if=/dev/zero of=test.fs
and after that I compile and run my program passing "w" as parameter
$ ./a.out w
Now, it seems to me that the write() function does its job correctly, because if I do...
$ cat test.fs
or
$ hexdump test.fs
... I can see the content of the file is consistent with the string I inserted!
However, if I run it into read-mode by passing "r" as parameter, I get always a curious random output, it seems to me like if I'm reading thrash from memory. I cannot understand where the read() function fails, thank you in advance.
The C code follows:
#include <stdio.h>
#include <string.h>
#define PRIME 7919
int main(int argc, char *argv[])
{
int mode;
if (argc < 2)
return 1;
else if (strcmp (argv[1], "r") == 0)
mode = 1;
else if (strcmp (argv[1], "w") == 0)
mode = 2;
else
return 1;
FILE *fs;
char buf[1024];
int val;
int i;
char c;
if (mode == 1) {
fs = fopen("test.fs", "rb");
val = read (buf, 1024, fs);
if (val != 1024)
fprintf(stderr, "Either an error occurred, or the EOF was reached.\n");
printf("Content read: %s\n", buf);
}
else if (mode == 2) {
fs = fopen("test.fs", "wb");
printf ("Enter a string you want write to disk: ");
while ((c = getchar()) != EOF)
buf[i++] = c;
buf[i] = EOF;
val = write (fs, buf, strlen (buf));
if (val == EOF)
fprintf(stderr, "An error occurred while writing.\n");
printf("%d bytes written to disk.\n");
}
fclose (fs);
}
int write(FILE *f, char *str, long len)
{
int i;
int err;
for (i=0; i < len && err != EOF; i++) {
fseek(f, (i*PRIME)%1024, SEEK_SET);
err = fputc(str[i], f);
}
if (err != EOF)
return i;
else
return EOF;
}
int read(char *buffer, long len, FILE *f)
{
int i;
int br = 1;
for (i=0; i < len && br != 0; i++) {
fseek(f, (i*PRIME)%1024, SEEK_SET);
br = fread (&buffer[i],1,1,f);
}
if (ferror (f))
return EOF;
else
return i;
}
buffer is not a string, it is an array of characters. When you incorrectly print it as a string:
printf("Content read: %s\n", buf);
you get a buffer overflow since your array isn't null terminated.

Resources