split file in c error get buffer in readfile - c

I program a program to split file in C in Ubuntu.
I have error when get buffer in readfile.
here is my code.
int split(char *filename, unsigned long part) {
FILE *fp;
char *buffer;
size_t result; // bytes read
off_t fileSize;
fp = fopen(filename, "rb");
if (fp == NULL) {
fprintf(stderr, "Cannot Open %s", filename);
exit(2);
}
// Get Size
fileSize = get_file_size(filename);
// Buffer
buffer = (char*) malloc(sizeof(char) * (fileSize + 1));
if (buffer == NULL) {
fputs("Memory error", stderr);
fclose(fp);
return 1;
}
// Copy file into buffer
//char buffers[11];
result = fread(buffer, 1, fileSize, fp);
buffer[fileSize] = '\0';
if (result != fileSize) {
fputs("Reading error", stderr);
return 1;
}
// Split file
off_t partSize = fileSize / part;
// Last Part
off_t lastPartSize = fileSize - partSize * part;
unsigned long i;
unsigned long j;
// create part 1 to n-1
for (j = 0; j < part; j++) {
char partName[255];
char *content;
char partNumber[3];
// Content of file part
// for (i = j; i < partSize * (j + 1); i++) {
//
// }
content = (char*) malloc(sizeof(char) * partSize);
content = copychar(buffer, j + i, partSize + i);
i += partSize;
//copy name
strcpy(partName, filename);
// part Number
sprintf(partNumber, "%d", j);
// file name with .part1 2 3 4 ....
strcat(partName, ".part");
strcat(partName, partNumber);
// Write to file
writeFile(partName, content);
free(content);
}
// last part
char *content;
content = (char*) malloc(sizeof(char) * (fileSize - partSize * (part - 1)));
content = copychar(buffer, (part - 1) * partSize + 1, fileSize);
char lastPartNumber[3];
char lastPartName[255];
sprintf(lastPartNumber, "%d", part);
strcpy(lastPartName, filename);
strcat(lastPartName, ".part");
strcat(lastPartName, lastPartNumber);
writeFile(lastPartName, content);
free(content);
free(buffer);
fclose(fp);
return 0;
}
here is function copychar from start to end
char *copychar(char* buffer, unsigned long start, unsigned long end) {
if (start >= end)
return NULL;
char *result;
result = (char*) malloc(sizeof(char) * (end - start) + 1);
unsigned long i;
for (i = start; i <= end; i++)
result[i] = buffer[i];
result[end] = '\0';
return result;
}
here is function to get filesize
off_t get_file_size(char *filename) {
struct stat st;
if (stat(filename, &st) == 0)
return st.st_size;
fprintf(stderr, "Cannot determine size of %s: %s\n", filename);
return -1;
}
here is function to write file
int writeFile(char* filename, char*buffer) {
if (buffer == NULL || filename == NULL)
return 1;
FILE *file;
file = fopen(filename, "wb");
fwrite(buffer, sizeof(char), sizeof(buffer) + 1, file);
fclose(file);
return 0;
}
When I test I use file test 29MB and it dumped.
I debug It return fileSize true but when readfile in buffer get from file it only return 135 characters and when use copychar it error.
Breakpoint 1, 0x0000000000400a0b in copychar (buffer=0x7ffff5e3a010 "!<arch>\ndebian-binary 1342169369 0 0 100644 4 `\n2.0\ncontrol.tar.gz 1342169369 0 0 100644 4557 `\n\037\213\b", start=4154703576, end=4164450461) at final.c:43
Program received signal SIGSEGV, Segmentation fault.
0x0000000000400a0b in copychar (buffer=0x7ffff5e3a010 "!<arch>\ndebian-binary 1342169369 0 0 100644 4 `\n2.0\ncontrol.tar.gz 1342169369 0 0 100644 4557 `\n\037\213\b", start=4154703576, end=4164450461) at final.c:43
Program terminated with signal SIGSEGV, Segmentation fault.
The program no longer exists.
I don't know how to devide buffer into part to write into part when split.
Thank for advance!

It's highly impractical to copy files in 1 big block as you may have noticed. And it's not needed.
At the simplest level you could copy the file byte by byte, like this
while( ( ch = fgetc(source) ) != EOF ) {
fputc(ch, target);
}
Which will work, but it will be quite slow. Better to copy in blocks, like this:
unsigned char buf[4096];
size_t size;
while( (size = fread(buf, 1, sizeof(buf), fpRead) ) > 0) {
fwrite(buf, 1, size, fpWrite);
}
Notice that the resulting code is way simpler and contains no dynamic memory allocation.
You still need to add the splitting logic of course, but that can be done by tracking the number of bytes written and opening a new write-file before actually writing it.
EDIT: how to handle the multipart facet - schematically, you still need to implement extra checks for some special cases and test results of the different system calls of course
unsigned char buf[4096];
size_t size;
size_t partsize = 100000; // asssuming you want to write 100k parts.
size_t stilltobewritten = partsize; // bytes remaining to be written in current part
size_t chunksize = sizeof(buf); // first time around we read full buffersize
while( (size = fread(buf, 1, chunksize, fpRead) ) > 0) {
fwrite(buf, 1, size, fpWrite);
stilltobewritten -= size; // subtract bytes written from saldo
if (stilltobewritten == 0) {
// part is complete, close this part and open next
fclose(fpWrite);
fpWrite = fopen(nextpart,"wb");
// and reinit variables
stilltobewritten = partsize;
chunksize = sizeof(buf);
} else {
// prep next round on present file - just the special case of the last block
// to handle
chunksize = (stilltobewritten > sizeof(buf)) ? sizeof(buf) : stilltobewritten;
}
}
and EDIT 2: the file part name can be made a LOT simpler as well:
sprintf(partName, "%s.part%d",file, j);

concerning the original code, there's some confusion about start and end in the copychar. First, you probably meant sizeof(char) * (end - start + 1) rather than sizeof(char) * (end - start) + 1 in the malloc, second, you're copying end-start+1 symbols from the original buffer (for (i = start; i <= end; i++)) and then overwrite the last one with '\0', which probably isn't the intended behavior.

Related

Checking if word is included in file

I wrote a function which should check if a word is included in a file, but my function returns always NOT_EXISTENT, why? I checked ptr and its always empty but the memory is located.
Here my function:
int search_for_word(char wort[]) {
char *ptr;
FILE *file;
unsigned long size_of_file = 0;
file = fopen("array.txt", "r");
if (file == NULL) {
return ERROR;
}
fseek(file, 0L, SEEK_END);
size_of_file = ftell(file);
ptr = malloc(sizeof(char) * size_of_file + 1);
printf("Size:%li\n", size_of_file);
if (ptr == NULL) {
return ERROR;
}
fread(ptr, sizeof(char), size_of_file, file);
if (strstr(ptr, wort) == NULL) {
return NOT_EXISTENT;
}
fclose(file);
return EXISTENT;
}
At least these problems:
(Biggest issue) Missing rewind #alinsoar
fread() is attempting a read from the end of the file. Move back to the beginning.
rewind(file); // Add
size_t length = fread(ptr, sizeof(char), size_of_file, file);
Not a string #pm100
ptr is not certainly a string as it may lack a null character. strstr() expects 2 strings.
strstr(ptr, wort) // bad
Instead, append a null character to the data read before strstr().
size_t length = fread(ptr, sizeof(char), size_of_file, file);
ptr[length] = '\0'; // Add
Failure to close
Code selectively performs fclose(file). Call fclose() with each successful fopen().
Missing free() #Weather Vane
Free allocated memory when done.
wort[] may be ill formed
Posted code does not show the origin of wort[]. So recommendations are guesses at best.
No check on fseek() success
// fseek(file, 0L, SEEK_END)
if (fseek(file, 0 /* L not needed */, SEEK_END) == -1) {
Handle_error();
}
Better with a const #chqrlie
This allows passing constant strings.
// int search_for_word(char wort[]){
int search_for_word(const char wort[]) {
Minor
Size sizeof(char) * size_of_file + 1 may exceed SIZE_MAX.
sizeof(char) * size_of_file + 1 conceptually wrong. Better as sizeof(char) * (size_of_file + 1) or just size_of_file + 1u.
Some rough alternative code - unchecked.
// Let calling code open the file
// Return 1 on success.
// Return 0 on no-find.
// Return -1 on other failures.
int search_for_word(const char *word, FILE *inf) {
if (inf == 0) {
return -1;
}
size_t length_word = strlen(word);
if (length_word >= SIZE_MAX / 2) {
return -1; // TBD code to handle this extreme case
}
size_t buf_size = 4096; // Adjust as desired
if (buf_size <= length_word * 2) {
buf_size = length_word * 2 + 1;
}
char *buf = malloc(buf_size);
if (buf == NULL) {
return -1;
}
char *in = buf;
size_t in_length = 0;
for (;;) {
size_t length_read = fread(in, 1, buf_size, inf);
in[length_read] = '\0';
if (strstr(buf, word)) {
free(buf);
return 1;
}
if (length_read < buf_size) { // no more data expected
free(buf);
return 0;
}
// Copy last portion of buffer to the beginning.
in_length += length_read;
memmove(buf, &buf[in_length - length_word], length_word);
in_length = length_word;
in = buf + in_length;
}
}
Here is a modified version implementing suggestions from chux's answer and with an alternative method for huge files (which should probably be used for all files):
int search_for_word(const char *wort) {
int res = NOT_EXISTENT;
FILE *file = fopen("array.txt", "r");
if (file == NULL) {
return ERROR;
}
#if 0 // set to 1 if you want to load the whole file in memory
if (fseek(file, 0L, SEEK_END) == -1) {
fclose(file);
return ERROR;
}
long size_of_file = ftell(file);
if (size_of_file < 0) {
fclose(file);
return ERROR;
}
rewind(file);
if ((unsigned long)size_of_file + 1 <= SIZE_MAX) {
char *ptr = malloc((size_t)size_of_file + 1);
if (ptr != NULL) {
size_t length = fread(ptr, 1, size_of_file, file);
ptr[length] = '\0';
res = strstr(ptr, wort) ? EXISTENT : NOT_EXISTENT;
free(ptr);
fclose(file);
return res;
}
}
#endif
/* use a different method: read 4KB at a time */
size_t len = strlen(wort);
char buf[4096 + len + 1];
size_t nread, pos = 0;
while ((nread = fread(buf + pos, 1, 4096, file)) > 0) {
buf[pos + nread] = '\0';
if (strstr(buf, wort)) {
res = EXISTENT;
break;
}
if (pos + nread <= len) {
pos += nread;
} else {
memmove(buf, buf + pos + nread - len, len);
pos = len;
}
}
fclose(file);
return res;
}

How to encrypt a file buffer with XOR?

I'm trying to read in a file to buffer and encrypt each byte with my XOR encryption key. I have implemented it like the following but it segfaults for some reason.
int main(char argc, char *argv[]) {
fileIn = fopen("data.bin", "rb"); // open input file (binary)
if (fileIn==NULL) {
puts("Error opening input file");
exit (1);
}
// obtain file size.
fseek(fileIn , 0 , SEEK_END);
lSize = ftell(fileIn);
rewind(fileIn);
printf("Filesize: %d bytes.\n", lSize);
// allocate memory to contain the whole file.
buffer = (unsigned char*) malloc (lSize);
if (buffer == NULL) {
puts("malloc for input file buffer failed (not enough memory?)");
exit (2);
}
// copy the file into the buffer.
fread (buffer, 1, lSize, fileIn);
char *enckey = "enckey123";
unsigned char *buf = buffer;
int index = 0;
while (buf < buf + lSize - 1) {
*buf++ ^= enckey[index++ % 9]; // 9 is the length of the encryption key
}
}
It segfaults right at this like *buf++ ^= enckey[index++ % 9];.
Debugging with gdb, I can see that lSize is something like 2000, but index has the value of 128585.
What am I doing wrong?
This loop:
while (buf < buf + lSize - 1) {
never finishes.
Perhaps you meant
while (buf < buffer + lSize) {
?
P.S. the -1 means it doesn't encrypt the last character.
The loop will never end.
I think the loop needs a better way to iterate over only lSize bytes.
I did it by changing the last few lines to this:
// ...
// Calculate where to stop the iteration
const unsigned char *buf_end = buf + lSize - 1;
// Walk over lSize bytes in the buffer
while (buf < buf_end) {
*buf++ ^= enckey[index++ % 9]; // 9 is the length of the encryption key
}
// ...
Similar to other answers, but I think this is more readable:
Instead of this:
while (buf < buf + lSize - 1) {
*buf++ ^= enckey[index++ % 9]; // 9 is the length of the encryption key
}
This:
for (size_t i = 0; i < lSize; i++) {
buf[i] = enckey[i % 9];
}

Why does this loop iterating over a file buffer throw a BAD ACCESS exception?

I've been picking at this for hours and can't figure out why the loop would try to access out of bounds memory.... Any help would be super appreciated!
int CountCharacters(FILE *fp, const char* filename){
// Get file size
fseek(fp, 0L, SEEK_END);
long size = ftell(fp);
if (size == -1){ perror("Failed: "); return 0;}
rewind(fp); //seek back to file's beginning
// Allocate approrpiately sized buffer ( size + 1 for null-termination)
char *buf = malloc(sizeof (char) * (size + 1));
// Read the entire file into memory
size_t newLen = fread(buf, sizeof(char), size, fp);
if ( ferror( fp ) != 0 ) {
fputs("Error reading file", stderr);
} else {
buf[newLen++] = '\0'; /* Just to be safe. */
}
//Try to get byte count from buffer
int byte_count[256] = {0}; //initialize character counts to 0
for (int i = 0; i < size; ++i){
byte_count[(int) buf[i]]++; //BAD ACCESS ERROR HERE
}
/* Do something with byte_count here */
return (1);
}

trying to read a file

basically trying to make an anti virus but all I get when trying to read the infected file into a buffer is EOF... it's a jpg and I have no idea how to fix this
about the file functions I'm allowed to use:
fread/fwrite
fgets
fputs
fclose
fopen
fgetc
fputc
fscanf
fprintf
int fullScan(FILE* sign, FILE* infected);
char* getFile(FILE* file);
int main(int argc, char** argv)
{
FILE* sign = fopen("KittenVirusSign", "rb");
FILE* infected = fopen("kitten_frog.jpg", "rb");
int j = 0;
if (infected == NULL)
{
printf("couldn't open the file (suspicious file)");
return -1;
}
if (sign == NULL)
{
printf("couldn't open the file (virus signature)");
return -1;
}
j = fullScan(sign, infected);
return 0;
}
int fullScan(FILE* sign, FILE* infected)
{
char* sign_c = NULL;
char* infec_c = NULL;
int infect_res = -1;
int sign_len = 0;
int infec_len = 0;
int i = 0;
int j = 0;
sign_c = getFile(sign);
infec_c = getFile(infected);
while (1)
{
if (*(infec_c + i) == *(sign_c + j))
{
infect_res = 1;
if (*(sign_c + j) == EOF)
{
break;
}
else if (*(infec_c + i) == EOF)
{
infect_res = -1;
break;
}
i++;
j++;
continue;
}
else if (*(infec_c + i) != *(sign_c + j))
{
if (*(infec_c + i) == EOF || *(sign_c + j) == EOF)
{
break;
}
i++;
j = 0;
infect_res = -1;
}
}
fclose(infected);
free(sign_c);
free(infec_c);
return infect_res;
}
char* getFile(FILE* file)
{
char* buffer;
long filelen;
int i;
fseek(file, 0, SEEK_END);
filelen = ftell(file);
fseek(file, 0, SEEK_SET);
buffer = (char *)malloc((filelen + 1)*sizeof(char));
for (i = 0; i < filelen; i++)
{
fread(buffer + i, sizeof(char), 1, file);
}
return buffer;
}
EOF is a special integer value returned by some input functions to indicate that the end of the file has been reached, but it is not part of the file data. Your fread() will therefore never store an EOF character into the input buffer you provided. However, if your C implementation features signed default chars, as many do, then there is a char value that is numerically equal to EOF (usually -1).
If either file happens to contain that byte, then your code will misinterpret it as designating the end of that file. If it happens to be the first byte in either file then the program will misinterpret the file as being empty.
Since you are analyzing binary files,
I recommend using buffers of unsigned char rather than default char.
All possible byte values can appear in the file data, so you cannot identify the end of the data by the value of any byte within.
Probably, getFile() should return a struct that contains both a pointer to the buffer and its size.
As other answer suggested, you should also send the file length and iterate over that, rather than waiting for a EOF.
Also, in your getFile() function, when you determine the length of the file you don't have to read byte by byte, you can just send the filelen to fread() like so
fread(buffer, sizeof(char), filelen, file);
fread now reads filelen elements of data each the size of a char (you can write 1 instead) from the stream file to buffer.

Copy file skipping first n lines and last m lines

I want to copy a file with skipping first n of its lines and last m lines using open, read, write and lseek
(eg. n = 1, m = 2, source file:
AAAAAAA
BBBBBBB
CCCCCCC
DDDDDDD
dest file:
BBBBBBB )
I know how to copy a file but don't know how to skip the lines. Here is my code for copy:
char buf[128];
size_t size;
int source = open(argv[1], O_RDONLY);
int dest = open(argv[2], O_CREAT | O_APPEND | O_WRONLY);
if(source == -1) {
printf("error");
return;
}
if(dest == -1) {
printf("error");
return;
}
while((size = read(source, buf, sizeof(buf))) > 0) {
write(dest, buf, size);
}
close(source);
close(dest);
How can i solve this problem?
You should use fgets to read your file since that will read it line by line.
Because it is not trivial to say how many lines you have in total I would suggest you
use fgets to read the file line by line
skip outputting the first n lines
write the rest to your output files and count the number of lines and remember the length of each.
use ftruncate to truncate off the last m lines.
This should do the trick:
void copy_nm(char * source, char * dest, int n, int m) {
FILE * in = fopen(source, "r");
FILE * out = fopen(dest, "w");
size_t file_length = 0;
size_t line_lengths[m + 1];
memset(line_lengths, 0 , sizeof(line_lengths));
int lengths_iterator = 0;
char buffer[0x400];
while (fgets(buffer, sizeof(buffer), in)) {
size_t length = strlen(buffer);
if (n) { // skip this line
if (buffer[length - 1] == '\n') // only if it is a real line
n--;
continue;
}
fwrite(buffer, length, 1, out);
line_lengths[lengths_iterator] += length;
file_length += length;
if (buffer[length - 1] != '\n') { // line was longer then the buffer
continue;
}
lengths_iterator++;
lengths_iterator %= m+1;
line_lengths[lengths_iterator] = 0;
}
for (lengths_iterator = 0; lengths_iterator < m+1; lengths_iterator++) {
file_length -= line_lengths[lengths_iterator];
}
fseek(out, 0, SEEK_SET); // rewind before truncating
ftruncate(fileno(out), file_length);
}

Resources