reverse content of file block by block - c

So here is the program to reverse content of a file block by block.
#include <stdio.h>
#include <stdlib.h>
#define BS 12
void reverse(char * buffer, int size)
{
char tmp;
int i;
for(i = 0; i < size / 2; i++)
{
tmp = (char)buffer[i];
buffer[i] = buffer[size - i - 1];
buffer[size - i - 1] = tmp;
}
}
int main (const int argc, const char** argv)
{
if(argc != 3)
exit(-1);
char * buffer = malloc(BS);
FILE * f1, * f2;
f1 = fopen(argv[1], "r");
f2 = fopen(argv[2], "w");
fseek(f1, 0, SEEK_END);
long i = ftell(f1);
// long f1_len = ftell(f1);
// unsigned char tmp;
int if_end = 1;
int need = 0;
int count;
do
{
i = i - BS;
if(i < 0)
{
need = BS - abs(i);
i = 0;
}
else
need = BS;
fseek(f1, i, SEEK_SET);
if(if_end) // strip EOF
{
count = fread(buffer, need - 1, 1, f1);
if_end = 0;
}
else
count = fread(buffer, need, 1, f1);
reverse(buffer, count);
fwrite(buffer, count, 1, f2);
if(i == 0)
break;
}while(i > 0);
fclose(f1);
fclose(f2);
free(buffer);
return 0;
}
testfile:
$ xxd testfile
0000000: 6162 6364 6566 670a abcdefg.
$ gcc test.c -o test
$ ./test testfile testfile2
$ xxd testfile2
0000000: 61 a
Any idea where is wrong? I have been debugging this for long time.

Your problem is that fwrite returns the number of successful blocks, not the number of bytes.
So reverse(buffer, count); needs to be reverse(buffer,count * need)
Similairly the write to the output needs to be fwrite(buffer, count * need, 1, f2);

Related

File overflowing into allocated memory C

I am trying to write a basic mutant testing script in C, but I've come across some errors which I cannot seem to solve. The first thing to comment is that in the function that seems to be having problems, namr, I try am trying to name the file I am creating using a simple caesar cypher to avoid having unwanted characters in the file name. When I run it as is, it seems as though the strings cexp and mcexp are sometimes getting content from a file I am reading in another function switchr.
When I add the printf at Annotation 1 it seems to run fine but the filenames come out wrong. Still, if I comment Annotation 1 out, there is a malloc(): corrupted top size error. I have tried various prints to see what is wrong. By the time it gets to Annotation 2, cexp and mcexp are still the desired length and content but, by the time they get to Annotation 3, they're 26 or 25 characters long and include the starting lines of the file I am reading in other parts of the script.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *namr(char *exp, char *mexp, int ignv) {
int explen = strlen(exp);
int mexplen = strlen(mexp);
//printf("EXPLEN: %d MEXPLEN: %d\n",explen,mexplen);
//ANNOTATION 1
char *cexp = (char *)malloc(explen + 1);
char *cmexp = (char *)malloc(mexplen + 1); //Exp in Caeser Cipher
for (int i = 0; i < explen; i++) {
cexp[i]= (exp[i] ? 'A' + exp[i] % 25 : exp[i]);
printf("%d - %c - %c\n", i, exp[i], 'A' + exp[i] % 25);
//ANNOTATION 2
}
for (int i = 0; i < mexplen; i++) {
cmexp[i]= (mexp[i] ? 'A' + mexp[i] % 25 : mexp[i]);
}
printf("EXP: %s\nMEXP: %s\n", exp, mexp);
printf("CEXP: %s\nCMEXP: %s\n", cexp, cmexp);
//ANNOTATION 3
printf("%s - %d\n%s - %d\n%d\n", cexp, strlen(cexp),
cmexp, strlen(cmexp), strlen("./U_SWITCH_MTNTS/TO%03.c"));
char *outname = (char *)malloc((30 + explen + mexplen));
sprintf(outname, "./U_SWITCH_MTNTS/%sTO%s%03d.c", cexp, cmexp, ignv);
free(cexp);
free(cmexp);
return outname;
}
int countr(char *filename, char *exp) {
int out = 0;
int i, flag;
int inlen = strlen(exp);
char c;
FILE *f = fopen(filename, "r");
while (c != EOF) {
for (i = 0, flag = 0; i < inlen; i++) {
if (exp[i] != c) {
flag = 1;
break;
}
c = getc(f);
}
if (flag == 0)
out++;
c = getc(f);
}
fclose(f);
return out;
}
char *switchr(char *filename, char *exp, char *mexp, int ignv) {
int i, flag,buffcount;
FILE *f = fopen(filename, "r");
char *outname = namr(exp, mexp, ignv);
FILE *fout = fopen(outname, "w");
char c = getc(f);
int ignc = ignv;
int inlen = strlen(exp);
char *buffer = (char *)malloc(inlen * sizeof(char));
while (c != EOF) {
for (i = 0, flag = 0, buffcount = 0; i < inlen; i++) {
if (exp[i] != c) {
flag = 1;
break;
} else {
buffer[buffcount] = c;
buffcount++;
c = getc(f);
}
}
if (flag == 0) {
if(ignc == 0) {
fputs(mexp, fout);
} else {
for (i = 0; i < buffcount; i++)
fputc(buffer[i], fout);
}
ignc--;
} else {
for (i = 0; i < buffcount; i++)
fputc(buffer[i], fout);
}
fputc(c, fout);
c = getc(f);
}
fclose(f);
fclose(fout);
return outname;
}
void mstrswitch(char *filename) {
int ecount = countr(filename, "==");
char **filenames = (char **)malloc(5 * ecount * sizeof(char *));
char command[100];
system("mkdir U_SWITCH_MTNTS");
system("mkdir TEST_OBJECTS");
for (int i = 0;i < ecount; i++) {
filenames[5 * i] = switchr("test.c", "==", "<=", i);
filenames[5 * i + 1] = switchr("test.c", "==", ">=", i);
filenames[5 * i + 2] = switchr("test.c", "==", ">", i);
filenames[5 * i + 3] = switchr("test.c", "==", "<", i);
filenames[5 * i + 4] = switchr("test.c", "==", "!=", i);
}
for (int i = 0; i < 5 * ecount; i++) {
sprintf(command, "gcc -o ./TEST_OBJECTS/test%03d %s", i, filenames[i]);
system(command);
sprintf(command, "./TEST_OBJECTS/test%03d", i);
system(command);
free(filenames[i]);
}
free(filenames);
}
int main() {
mstrswitch("test.c");
return 0;
}
You never zero terminates the strings cexp and cmexp. So add these two lines:
for(int i=0;i<explen;i++)
{
cexp[i]= (exp[i]?'A'+exp[i]%25: exp[i]);
printf("%d - %c - %c\n",i,exp[i],'A'+exp[i]%25);
}
cexp[explen]= '\0'; <------------------- add
for(int i=0;i<mexplen;i++)
{
cmexp[i]= (mexp[i]?'A'+mexp[i]%25: mexp[i]);
}
cmexp[mexplen]= '\0'; <------------------- add
Besides that the following line looks strange:
cexp[i]= (exp[i]?'A'+exp[i]%25: exp[i]);
^^^^^^
Always true
Having a condition that always returns true is probably not what you intended.

searching if arr is in a binary file c

I have a project; a kind of a anti-virus, to search whether a virus is in a file. My idea was to get a length of the virus and jump by one every time I reach the the length but I don't know how to do it exactly.
int virusChaek(FILE* file, char virus[],char filesNames[], int len)
{
char* pch;
int sizeOfFile = 0;
int i = 0;
int arr[100] = {0};
int* myArr;
int isVirus = 0;
file = fopen(filesNames, "rb");
if (file == NULL)
{
printf("file does not exist.");
return 1;
}
fseek(file, 0, SEEK_END);
sizeOfFile = ftell(file);
fseek(file, 0, SEEK_SET);
myArr = (int*)malloc(sizeof(int) * strlen(virus));
while (0 < (sizeOfFile))
{
fread(arr, 1, 1, file);
printf("arr[0] = %d\n", arr[0]);
myArr[i] = arr[0];
printf("myArr = %d\n", myArr[i]);
sizeOfFile--;
i++;
}
free(myArr);
fclose(file);
return 0;
}

Sort ints from a txt file

I need to sort ints from a file in ascending order and print them to the standard output. I can't modify the structure of the file.
The txt file looks like this:
41
65
68
35
51
...(one number in a row)
My program works just fine for small files, but I have to optomize it for larger files (like 3 million numbers) using malloc, but don't know exactly where and how. I'd like to ask for help in this. (I'm a beginner)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER 100000
int sort(int size, int arr[])
{
for (int i = 0; i < size - 1; i++)
{
for (int j = 0; j < size - i - 1; j++)
{
if (arr[j] > arr[j + 1])
{
int swap = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = swap;
}
}
}
}
int main(int argc, char *argv[])
{
char *filename = argv[1];
char s[20];
if (argc == 1)
{
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
int arr[BUFFER];
int i = 0;
int size = 0;
while ((fgets(s, BUFFER, fp)) != NULL)
{
s[strlen(s) - 1] = '\0';
arr[i] = atoi(s);
++i;
++size;
}
fclose(fp);
sort(size, arr);
for (int i = 0; i < size; ++i)
{
printf("%d\n", arr[i]);
}
return 0;
}
Your program could look like this:
#include <stdlib.h>
#include <stdio.h>
static int numcompar(const void *a, const void *b) {
const int *x = a;
const int *y = b;
// it is tempting to return *x - *y; but undefined behavior lurks
return *x < *y ? -1 : *x == *y ? 0 : 1;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
// TODO: handle error
abort();
}
char *filename = argv[1];
// open the file
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
abort();
}
// this will be our array
// note realloc(NULL is equal to malloc()
int *arr = NULL;
size_t arrcnt = 0;
// note - I am using fscanf for simplicity
int temp = 0;
while (fscanf(fp, "%d", &temp) == 1) {
// note - reallocating the space each number for the next number
void *tmp = realloc(arr, sizeof(*arr) * (arrcnt + 1));
if (tmp == NULL) {
free(arr);
fclose(fp);
abort();
}
arr = tmp;
// finally assignment
arr[arrcnt] = temp;
arrcnt++;
}
fclose(fp);
// writing sorting algorithms is boring
qsort(arr, arrcnt, sizeof(*arr), numcompar);
for (size_t i = 0; i < arrcnt; ++i) {
printf("%d\n", arr[i]);
}
free(arr);
}
Note that reallocating for one int at a time is inefficient - realloc is usually a costly function. The next step would be to keep the number of the size of the array and "used" (assigned to) elements of the array separately and reallocate the array by a ratio greater then 1. There are voices that prefer to use the golden ratio number in such cases.
To read an undetermined number of entries from the input file, you can allocate and reallocate an array using realloc() as more entries are read. For better performance it is recommended to increase the allocated size by a multiple instead of increasing linearly, especially one entry at a time.
Your sorting routine is inappropriate for large arrays: insertion sort has quadratic time complexity, so it might take a long time for 3 million items, unless they are already sorted. Use qsort() with a simple comparison function for this.
Here is a modified program:
#include <stdio.h>
#include <stdlib.h>
static int compare_int(const void *pa, const void *pb) {
int a = *(const int *)pa;
int b = *(const int *)pb;
// return -1 if a < b, 0 if a == b and +1 if a > b
return (a > b) - (a < b);
}
int main(int argc, char *argv[]) {
if (argc == 1) {
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
char buf[80];
size_t n = 0, size = 0;
int *array = NULL;
/* read the numbers */
while (fgets(buf, sizeof buf, fp)) {
if (n == size) {
/* increase size by at least 1.625 */
size_t newsize = size + size / 2 + size / 8 + 32;
int *newarray = realloc(array, newsize * sizeof(*array));
if (newarray == NULL) {
printf("cannot allocate space for %zu numbers\n", newsize);
free(array);
fclose(fp);
exit(1);
}
array = newarray;
size = newsize;
}
array[n++] = strtol(buf, NULL, 10);
}
fclose(fp);
/* sort the array */
qsort(array, n, sizeof(*array), compare_int);
for (size_t i = 0; i < n; i++) {
printf("%d\n", array[i]);
}
free(array);
return 0;
}

file size exceeds buffer size

I want to compare 2 files for identical lines: mytab2411.txt(15,017,210 bytes in size) and shadow.txt (569 bytes in size) but when I compiled this code and ran the program, I get a segmentation fault. I know that it's because the "mytab2411.txt" file exceeds the size of "char buf" but how do I go about solving this problem without overflowing the buffer?
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
int cmp(const void * s1, const void * s2)
{
return strcasecmp(*(char **)s1, *(char **)s2);
}
int cmp_half(const char * s1, const char * s2)
{
int i;
for (i = 0; i < 3; i++)
{
int res = strncasecmp((char *)s1+i*3, (char *)s2+i*3, 2);
if (res != 0) return res;
}
return 0;
}
char * line[1024];
int n = 0;
int search(const char * s)
{
int first, last, middle;
first = 0;
last = n - 1;
middle = (first+last)/2;
while( first <= last )
{
int res = cmp_half(s, line[middle]);
if (res == 0) return middle;
if (res > 0)
first = middle + 1;
else
last = middle - 1;
middle = (first + last)/2;
}
return -1;
}
int main()
{
FILE * f1, * f2;
char * s;
char buf[1024*1024], text[1024];
f1 = fopen("shadow.txt", "rt");
f2 = fopen("mytab2411.txt", "rt");
s = buf;
while (fgets(s, 1024, f2) != NULL)
{
line[n] = s;
s = s+strlen(s)+1;
n++;
}
qsort(line, n, sizeof(char *), cmp);
while (fgets(text, 1024, f1) != NULL)
{
text[strlen(text)-1] = 0;
int idx = search(text);
if (idx >= 0)
{
printf("%s matched %s\n", text, line[idx]);
}
else
{
printf("%s not matched\n", text);
}
}
return 0;
}
Your method assumes that each line in the file is 1024 bytes long. In practice the lines can be up to 1024 bytes, but most lines are much shorter. Use strdup or malloc to allocate memory for each line based on line's length.
Store the lines in dynamically allocated arrays. This is about 15 MB of data and it should not be a problem unless there are resource limitations.
int main(void)
{
char buf[1024];
char **arr1 = NULL;
char **arr2 = NULL;
int size1 = 0;
int size2 = 0;
FILE * f1, *f2;
f1 = fopen("shadow.txt", "r");
f2 = fopen("mytab2411.txt", "r");
while(fgets(buf, 1024, f1))
{
size1++;
arr1 = realloc(arr1, sizeof(char*) * size1);
arr1[size1 - 1] = strdup(buf);
}
while(fgets(buf, 1024, f2))
{
size2++;
arr2 = realloc(arr2, sizeof(char*) * size2);
arr2[size2 - 1] = strdup(buf);
}
for(int i = 0; i < size1; i++)
for(int j = 0; j < size2; j++)
{
if(strcmp(arr1[i], arr2[j]) == 0)
printf("match %s\n", arr1[i]);
}
return 0;
}

find special cases while comparing files in reverse order

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BS 12
void reverse(char * buffer, int size)
{
char tmp;
int i;
for(i = 0; i < size / 2; i++)
{
tmp = (char)buffer[i];
buffer[i] = buffer[size - i - 1];
buffer[size - i - 1] = tmp;
}
}
int compare_bin(char * buffer, char * buffer2, int size)
{
// because strncmp is only for string without \x00, so there must be a customized compare function
int i;
for(i = 0; i < size; i++)
{
if(buffer[i] != buffer2[i])
return 0;
}
return 1;
}
int main (const int argc, const char** argv)
{
if(argc != 3)
exit(-1);
int equal = 1;
char * buffer = malloc(BS), * buffer2 = malloc(BS);
FILE * f1, * f2;
f1 = fopen(argv[1], "r");
f2 = fopen(argv[2], "r");
fseek(f1, 0, SEEK_END);
fseek(f2, 0, SEEK_END);
long i = ftell(f1), j = ftell(f2);
if(i != j)
{
equal = 0;
goto endp;
}
fseek(f2, 0, SEEK_SET);
int need = 0;
int count;
int f2_pos = 0;
do
{
i = i - BS;
if(i < 0)
{
need = BS - abs((int)i);
i = 0;
}
else
need = BS;
fseek(f1, i, SEEK_SET);
count = fread(buffer, need, 1, f1);
reverse(buffer, count * need);
// fwrite(buffer, count * need, 1, f2);
fread(buffer2, need * need, 1, f2);
// printf("compare...\n");
// for(int i = 0; i < need * count; i++)
// {
// printf("%02hhX", buffer[i]);
// }
// printf("\n");
// for (int i = 0; i < need * count; i++)
// {
// printf("%02hhX", buffer2[i]);
// }
// printf("\n");
if(compare_bin(buffer, buffer2, need * count) == 0)
{
equal = 0;
break;
}
f2_pos += need * count;
fseek(f2, f2_pos, SEEK_SET);
if(i == 0)
break;
}while(i > 0);
fclose(f1);
fclose(f2);
free(buffer);
free(buffer2);
endp:
if(equal)
return 0;
else
{
printf("2 files not equal is reversed order\n");
return 1;
}
return 0;
}
So I write a program to compare file content in reverse order. I have already considered \x00 in binary file and strncmp isn't used. But there is still flaw. There is a test server to test this program. But I dont have access to it. This program always fails on that server. So there must be some special cases to make it fail. Any idea?
There are other ways around it. For instance, calculating MD5. But I want to fix this.
For the very first iteration where you read data you have
fread(buffer2, need * need, 1, f2);
The problem is that in that case need is 12, which is the size of the memory allocated for buffer2, but you ask to read 12 * 12 bytes.
If the second file is large enough, you will write out of bounds in the memory, leading to undefined behavior. If the file is not large enough then you won't read anything.
Also note that the order of the two middle arguments to fread matter. If you changed the order you would write out of bounds of the buffer both if the file is larger than need * need or not. You should really read count byte-sized object (the second argument should be 1 and the third should be count, which of course mean you need to change the order in the first call as well).
In short, your two fread calls should be
count = fread(buffer, 1, BS, f1);
fread(buffer2, 1, count, f2);
PS. Don't forget error checking.

Resources