End of File detection - c

I’m a very novice programmer and I’ve encountered an issue whose nature I don’t understand, whilst working on a problem set of the excellent cs50 course. I have implemented a program to recover JPEG pictures from an image of a memory card and am implementing a break at End of File as follows:
if(file > 1)
{
if (fread(&buffer, 1, 512, in_pointer) != 512)
{
free(filename);
return 0;
}
else
fseek(in_pointer, -512, SEEK_CUR);
}
(the pictures are filling up the card in 512 byte blocks). When I first implemented this it broke my first picture (it was recognizable but distorted) so I excluded it by means of the first if statement. Now however the middle files of the set are slightly off– they still open as Jpegs but I can’t get their thumbnails to work. My hypothesis is that I am corrupting the JPEG file format header. The beginning (including first and last images of the set work perfectly).
My questions are:
What is an elegant way to implement an EOF break since my getto solution is causing trouble?
What is the likely nature of the problem I’ve created (in layman’s terms)?
Thank you very much,
Tikhon
ps here is the whole thing
#include <cs50.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
int main(int argc, char *argv[])
{
// ensure proper usage
if (argc != 2)
{
fprintf(stderr, "enter exactly two command line arguments: ./recover and destination of disc to scan\n");
return 1;
}
//name the file
char *infile = argv[1];
//open card file and ensure proper format
FILE *in_pointer = fopen(infile, "r");
if (in_pointer == NULL)
{
fprintf(stderr, "could not open %s\n", infile);
return 2;
}
typedef uint8_t BYTE;
BYTE buffer[512];
bool new_jpeg = false;
int block = 0;
int file = 0;
char *filename = malloc(3);
//sprintf(filename, "%03i.jpg",1);
do
{
//read a 512 block of a jpeg
fread(&buffer, 512, 1, in_pointer);
//check for new jpeg
if (buffer[0] == 0xff &&
buffer[1] == 0xd8 &&
buffer[2] == 0xff &&
(buffer[3] & 0xf0) == 0xe0) // took me a while to figure this out
{
new_jpeg = true;
//printf("jpeg found, block %i\n", block);
}
block++;
} while(new_jpeg == false);
do
{
//set name of file to write to
sprintf(filename, "%03i.jpg",file);
file++;
new_jpeg = false;
// open output file
FILE *img = fopen(filename, "w");
if (img == NULL)
{
fprintf(stderr, "Could not create %s.\n", filename);
return 3;
}
//add blocks to file while before we reach the nea JPEG.
do
{
fwrite(&buffer, 1, 512, img);
//read the next block
fread(&buffer, 1, 512, in_pointer);
//There MUST be a better way... Anyhow this checks for end of file but backtracks becouse the act of checking moved the file coursor forward...
if(file > 1)
{
if (fread(&buffer, 1, 512, in_pointer) != 512)
{
free(filename);
return 0;
}
else
fseek(in_pointer, -512, SEEK_CUR);
}
block++; //we are reading off teh next block
if (buffer[0] == 0xff &&
buffer[1] == 0xd8 &&
buffer[2] == 0xff &&
(buffer[3] & 0xf0) == 0xe0) // took me a while to figure this out
{
new_jpeg = true;
//printf("jpeg %i found, block %i\n", file, block);
}
}while(new_jpeg == false);
}while(!feof(in_pointer));
free(filename);
//ran valgrind no probs detected.
}

OK, I fixed it.
Instead of having a whole separate section of code to check eof I killed two birds in one stone and fread the file WHILE checking for eof:
//read the next block
int k = fread(&buffer, 1, 512, in_pointer);
if(k != 512)
{
free(filename);
return 0;
}
I still have no idea why my previous method didn't work, I would be extremly grateful for suggestions...

Related

CS50 PSET4 RECOVER: fread() not populating buffer array

Hello and thank you for taking a look.
I'm working through CS50x and am struggling with Recover. The aim is to open a .raw file, read its contents in 512-byte blocks, check the initial four bytes for .jpg headers, and then write each JPEG data to a new file.
I have a body of code written, and the file compiles. The debugger tells me that my buffer[512] variable remains empty/zeroed. This then means the program skips if/else conditions and the program exits.
While my logic within the While loop may be flawed, I haven't been able to step far enough into the program to consider this.
I looked up my issue before posting. Some sources like to use fread(buffer, 512, 1, input), but CS50 itself uses fread(buffer, 1, 512, input). Also, when initialising the filename, I have tried both char *filename = malloc(8 * sizeof(char)); and char filename[8];. For both lines I have tried each method and am still missing something.
My code is below. Thank you in advance for your time.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
typedef uint8_t BYTE;
int main(int argc, char *argv[])
{
// First check the number of arguments is correct.
if (argc != 2)
{
printf("Correct Usage: ./recover.c [filename]\n");
return 1;
}
// Open the file.
FILE *inputFile = fopen(argv[1], "r");
if (inputFile == NULL)
{
printf("File not found.\n");
return 1;
}
// Create counter of number of files.
int counter = 0;
// Create filename variable
char *filename = malloc(8 * sizeof(char)); // 7 + 1 for \0
// Create a 512-size array buffer.
BYTE buffer[512];
// Initialise img file for scope access.
FILE *img = NULL;
while (fread(buffer, sizeof(BYTE), 512, inputFile))
{
// If start of new JPEG:
if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff && (buffer[3] & 0xf0) == 0xe0)
{
if (counter == 0) // If the FIRST JPEG
{
// Make new file:
sprintf(filename, "%03i.jpg", counter);
img = fopen(filename, "w");
fwrite(buffer, sizeof(BYTE), 512, img);
}
else // If not the first JPEG
{
fclose(img); // Close previous file.
counter++;
// Make new file:
sprintf(filename, "%03i.jpg", counter); // Update filename.
img = fopen(filename, "w");
fwrite(buffer, sizeof(BYTE), 512, img);
}
}
else if (counter > 0) // buffer is continuation of previous.
{
fwrite(buffer, sizeof(BYTE), 512, img);
}
else
{
printf("I exited with no images.\n");
return 2;
}
}
free(filename);
fclose(img);
fclose(inputFile);
return 0;
}
The program exits (returns) after the first line in the raw file is read (assuming it's not a jpeg header, which is the case with the distro raw file). else if (counter > 0) evaluates to false, so the else branch executes.
Thank you everyone for your response. The issue is now fixed!
#DinoCoderSaurus (sorry, can't upvote yet) prompted me to realise that I had assumed (wrongly) that the data in the raw file would immediately begin with a .jpeg header (in fact it looks like the data begins with a hidden message, "surprise").
The Else condition was initially put there to avoid errors but of course it was prematurely exiting the While loop. The buffer was populated correctly after a couple of loops.
I then encountered the second problem (pointed out by #Some_programmer_dude) that counter++; was in the wrong place, which meant after the first new JPEG, no others could be written.
I'll also take your comments about best practice into consideration.

CS50 Problem set 4 Recover not recovering images

I could do with some advice for this, to me this makes sense logically however when I run check50 only one of the images are recovered. I've looked through the code multiple times so I don't think its a syntax error so it must be some error with the logic. Any tips would be greatly appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <cs50.h>
typedef uint8_t BYTE;
bool is_jpeg_header(BYTE buffer[]);
int main(int argc, char *argv[])
{
// Check if command line argument is valid
if (argc != 2)
{
printf("Usage: ./recover image\n");
return 1;
}
// Open memory card files
char* mem_card = argv[1];
FILE* inptr = fopen(mem_card, "r");
if (inptr == NULL)
{
printf("File not found/n");
return 1;
}
BYTE buffer[512];
bool found_first_jpeg = false;
int image_count = 0;
char filename[8];
FILE* outptr = NULL;
while (!feof(inptr) && fread(buffer, sizeof(buffer), 1, inptr) == true)
{
// Check if we have found a JPEG
if (is_jpeg_header(buffer) == true)
{
// Check if this is the first JPEG
if (found_first_jpeg == false)
{
found_first_jpeg = true;
sprintf(filename, "%03i.jpg", image_count);
outptr = fopen(filename, "w");
fwrite(buffer, sizeof(buffer), 1, outptr);
image_count++;
}
// If this isn't the first JPEG, close file current JPEG and open new one for new JPEG
else
{
fclose(outptr);
image_count++;
sprintf(filename, "%03i.jpg", image_count);
outptr = fopen(filename, "w");
}
}
// If we haven't found a new JPEG:
else if (is_jpeg_header(buffer) == false)
{
// Continue reading file if we have not found first JPEG
if (found_first_jpeg == false)
{
continue;
}
// Continue writing current JPEG into current file
else
{
fwrite(buffer, sizeof(buffer), 1, outptr);
}
}
}
fclose(inptr);
fclose(outptr);
return 0;
}
bool is_jpeg_header(BYTE buffer[])
{
if (((buffer[0] == 0xff) && (buffer [1] == 0xd8) && (buffer[2] == 0xff) && ((buffer[3] & 0xf0) == 0xe0)))
{
return true;
}
return false;
}
This is the error code I receive from check50
:) recover.c exists.
:) recover.c compiles.
:) handles lack of forensic image
:) recovers 000.jpg correctly
:( recovers middle images correctly
001.jpg not found
:( recovers 049.jpg correctly
recovered image does not match
One bug I see is that filename is too short: you haven't left any room for the terminating zero. That's undefined behavior, and likely your source of trouble.
But the logic overall is very convoluted for what amounts to a simple problem. Here's how I'd write it. Since you're not in general checking for errors, I've left it that way - it's likely OK for this test assignment, although I've not read it. It does help to return different error codes for different errors though - it'd have really helped with the original typo!
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
typedef uint8_t bool;
static const bool true = 1;
static const bool false = 0;
bool is_jpeg_header(const uint8_t buffer[]);
int main(int argc, char *argv[])
{
// Check if command line argument is valid
if (argc != 2)
{
printf("Usage: ./recover image\n");
return 1;
}
// Open the memory card image
char* mem_card = argv[1];
FILE* infile = fopen(mem_card, "r");
if (!infile)
{
printf("File not found/n");
return 2;
}
uint8_t buffer[512];
int image_count = 0;
char filename[9];
FILE* outfile = NULL;
while (!feof(infile) && fread(buffer, sizeof(buffer), 1, infile) == 1)
{
// Check if we have found a JPEG
if (is_jpeg_header(buffer))
{
// If we're already writing output - close it
if (outfile)
fclose(outfile);
sprintf(filename, "%03i.jpg", image_count);
outfile = fopen(filename, "w");
image_count ++;
}
// Write the output if we're ready to write
if (outfile)
fwrite(buffer, sizeof(buffer), 1, outfile);
}
fclose(infile);
fclose(outfile);
return 0;
}
bool is_jpeg_header(const uint8_t buffer[])
{
return
buffer[0] == 0xff
&& buffer[1] == 0xd8
&& buffer[2] == 0xff
&& (buffer[3] & 0xf0) == 0xe0;
}
why while(!foef() is always wrong
regarding:
printf("File not found/n");
Error messages should be output to stderr, not stdout.
When the error indication comes from a C library function should also output to stderr, the text reason the system thinks the error occurred. The function:
perror( "your error msg" );
is made for this purpose.
regarding:
printf("Usage: ./recover image\n");
1) this should be to stderr, not stdout. 2) don't hardcode the executable name. Suggest:
fprintf( stderr, "Usage: %s imagefile\n". argv[0] );
regarding:
while (!feof(inptr) && fread(buffer, sizeof(buffer), 1, inptr) == true)
1) true and false are defined in stdbool.h so that header file needs to be included.
2) fread() returns the number of items read. (which is also the third parameter, (and remembering the prior statement about while( !foef() ) so the statement would be much better written as:
while ( fread(buffer, sizeof(buffer), 1, inptr) == 1)
which catches EOF and partial reads and I/O errors.
regarding;
outptr = fopen(filename, "w");
The success/failure of the call to fopen() is not under the control of your program, Therefore, always check (!=NULL) the returned value to assure the operation was successful.
regarding:
if (is_jpeg_header(buffer) == false)
{ // Continue reading file if we have not found first JPEG
if (found_first_jpeg == false)
{
continue;
} // Continue writing current JPEG into current file
else
This code can be completely removed
the posted code fails to close the current output file after the second file is started.
the posted code always reads sizeof(buffer) bytes (assuming no errors) but there is no guarantee that each image data is exactly a multiple of sizeof(buffer) in length so it can miss the encounter with the next image AND can result in part of the header, etc data from the next image being written into the current output file.
Please post the function:
is_jpeg_header(buffer)
as it is unlikely to correct the problems listed above.

Need advice pset4 recover. Tried for a week and still getting segmentation fault

Output:
:( recovers 000.jpg correctly
failed to execute program due to segmentation fault
:( recovers middle images correctly
failed to execute program due to segmentation fault
:( recovers 049.jpg correctly
failed to execute program due to segmentation fault
Code:
#include <stdio.h>
#include <stdint.h>
int main(int argc, char *argv[])
{
if (argc != 2) //to make sure that accept exactly one command-line argument
{
printf("Usage: ./recover key\n");
return 1;
}
FILE *infile = fopen(argv[1], "r"); //open the file card.raw and creating a new file called f in read format
if (infile == NULL)//if file cannot open then print below if can open just continue
{
printf("Cannot open file\n");
return 2;
}
FILE *img; //img is the output
int jpeg_counter = 0; // to count the no. of jpeg files
uint8_t buffer[512]; //cos 512 bytes and the buffer is the temporary storage
char filename[8];
while (fread(buffer, sizeof(buffer), 1, infile) == 512)
//continue doing this loop if the while conditions are true. to repeat until end of card.like while the file you reading is true,
{
if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff && (buffer[3] & 0xf0) == 0xe0)
//removing last 4 bits of the 8 bits, only looking at they first 4 which is e. setting all to 0
//if start of new jpeg with above conditions
{
if (jpeg_counter != 0) // telling them that you previously found jpeg
{
fclose(img);//else if never find before, tell them now that you have found it by making it true
}
sprintf(filename, "%03i.jpg", jpeg_counter); //%03i means print an integer with 3 digits
jpeg_counter ++;
img = fopen(filename , "w"); //open the new file w for writting
if (img == NULL) //see if can remove this
return 3;
fwrite(buffer, sizeof(buffer), 1, img);// writing new output file
}
if (jpeg_counter != 0)
fwrite(buffer, sizeof(buffer), 1, img);
}
fclose(infile);
fclose(img);
return 0;
}
the segfault is cased by fclose(img) and img is an invalid FILE pointer. The problem is your while loop condition is never true and the loop is never taken. Your fread will never return 512, it returns 1 on a success read. I have fixed the loop condition for you and added some printfs to print out more information so you have a better understanding of what happens. Here is a link of the fixed code in our cloud IDE, you can use it to debug segfault in the future.

CS50: I can't open the JPG's my image recovery program creates

As I said in the title I can't open the JPG's my image recovery program creates. The purpose of this program is to scan the argument infile for JPEGs a 512 byte block at a time. If the block signals the start of a new file the program should close the last outfile, open a new outfile and start writing into it. If the data in the block is not the start of a new file, the program should continue writing into the current outfile. My program creates 50 files which is the amount of photos in the infile. However, when I try to open them I am told "Invalid or Unsupported Image Format".
Could anyone offer any advice as I am a little stumped?
Here is my code
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
typedef uint8_t BYTE;
BYTE block[512];
int main(int argc, char *argv[])
{
if (argc != 2)
{
printf("Usage: ./recover filename\n");
return 1;
}
char *card = argv[1];
FILE *raw_data = fopen(card, "r");
if (raw_data == NULL)
{
printf("Couldn't open file.\n");
return 1;
}
char file_name[8];
BYTE buffer[512];
int counter = 0;
FILE *image = NULL;
while (fread(buffer, sizeof(block), 1, raw_data) != 0)
{
if (counter == 0)
{
sprintf(file_name, "%03i.jpg", counter);
image = fopen(file_name, "w");
fwrite(&buffer, sizeof(block), 1, image);
counter++;
}
else if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff && (buffer[3] & 0xf0) == 0xe0)
{
fclose(image);
sprintf(file_name, "%03i.jpg", counter);
image = fopen(file_name, "w");
counter++;
}
else
{
fwrite(&buffer, sizeof(block), 1, image);
}
}
fclose(raw_data);
fclose(image);
return 0;
}
The answer of your question is in this else if statement.
else if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff && (buffer[3] & 0xf0) == 0xe0)
{
fclose(image);
sprintf(file_name, "%03i.jpg", counter);
image = fopen(file_name, "w");
counter++;
}
When there is a jpeg signature in the beginning of each 512 byte block, this statement is executed. You close the image file if it is open before. You create a new image file, and you open it for writing. But then?? You did not write the buffer on image file where it has jpeg signature.. Bam! Your jpeg file is not supported! How can a computer know if it is jpeg without a jpeg signature?
There are other problems in while loop. Even if you fixed the main issue, you first jpeg file will be just garbage.. Why? Look at this if statement below. It is writing the very first 512 byte block of raw file without any check if it carries jpeg signature or not.
if (counter == 0)
{
sprintf(file_name, "%03i.jpg", counter);
image = fopen(file_name, "w");
fwrite(&buffer, sizeof(block), 1, image);
counter++;
}
The very first if statement in while loop should rather be to check if the 512 byte memory block has the jpeg signature or not. A pseudo code for a new design would help you. Because you probably want to fix other issues by yourself.
//If the buffer starts with the magic sequence found in the original code.
//If there is already an image found, then close. (Check if counter is not equal to zero.
//Create a new jpeg file.
//Open that new jpeg file.
//Write buffer on new jpeg file. This one should be outside of the main if statement.
Good luck with solving other issues. You are pretty close.

Pset4 (cs50) recover does not work properly. It compiles, but does not recover more than 2 jpegs. Is something wrong with checking for JPEG signature?

I am learning how to code and I have no experience with that at all. I've successful got to PSET4 and stuck on recover. I've read everything online about this problem and i found out that many people have similar code as I do and it works. Does not work for me whatsoever. Please have a look and give me a hint what did I do wrong and how to correct it.
Here is everything about the pset4 recover i downloaded their card.raw from here card.raw
/** recovering JPEG files from a memory card
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
typedef uint8_t BYTE;
int main(int argc, char* argv[])
{
// ensure proper usage
if (argc != 2)
{
fprintf(stderr,
"Usage: ./recover infile (the name of a forensic image from which to recover JPEGs)\n");
return 1;
}
// open input file (forensic image)
FILE* inptr = fopen(argv[1], "r");
if (inptr == NULL)
{
fprintf(stderr, "Could not open %s.\n", argv[1]);
return 2;
}
FILE* outptr = NULL;
// create a pointer array of 512 elements to store 512 bytes from the memory card
BYTE* buffer = malloc(sizeof(BYTE) * 512);
if (buffer == NULL)
{
return 3;
}
// count amount of jpeg files found
int jpeg = 0;
// string for a file name using sprintf
char filename[8] = { 0 };
// read memory card untill the end of file
while (fread(buffer, sizeof(BYTE) * 512, 1, inptr) != 0)
{
// check if jpeg is found
if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff
&& (buffer[3] >= 0xe0 || buffer[3] <= 0xef))
{
if (jpeg > 0)
{
fclose(outptr);
}
sprintf(filename, "%03d.JPEG", jpeg);
outptr = fopen(filename, "w");
jpeg++;
}
if (jpeg > 0)
{
fwrite(buffer, sizeof(BYTE) * 512, 1, outptr);
}
}
// free memory
free(buffer);
// close filename
fclose(outptr);
// close input file (forensic image)
fclose(inptr);
return 0;
}
The main problem is that you invoke undefined behavior because filename is not enough big. sprintf() need be 9 and 17 bytes with your code but you only has 8. So you have a buffer overflow.
Just change:
char filename[8] = { 0 };
to
char filename[17] = { 0 };
Because, you use an int, this value is implemented defined but in many system has an int with 32 bits. So the value possible are between -2^31 and 2^31 - 1 that make a maximum of 11 chars (-2147483648). We add the number of chars in ".JPEG", 5. We have 16 but you forget the null terminate byte of a c-string. So we are 17 maximum.
Modern compiler warning you: gcc version 7.1.1 20170516 (GCC):
In function ‘main’:
warning: ‘sprintf’ writing a terminating nul past the end of the destination [-Wformat-overflow ]
sprintf(filename, "%03d.JPEG", jpeg++);
^
note: ‘sprintf’ output between 9 and 17 bytes into a destination of size 8
sprintf(filename, "%03d.JPEG", jpeg++);
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Plus, your typedef is useless because a char world be always a byte in C. More than that you don't need a byte but an octet so like char, uint8_t would be always an octet in C. So you don't need typedef.
Again one thing, you allocate your buffer but it's useless because your buffer has a constant size. So just create an array is more simple.
#include <stdint.h>
#include <stdio.h>
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: ./recover infile (the name of a forensic image "
"from which to recover JPEGs)\n");
return 1;
}
FILE *inptr = fopen(argv[1], "r");
if (inptr == NULL) {
fprintf(stderr, "Could not open %s.\n", argv[1]);
return 2;
}
FILE *outptr = NULL;
uint8_t buffer[512];
size_t const buffer_size = sizeof buffer / sizeof *buffer;
size_t jpeg = 0;
while (fread(buffer, sizeof *buffer, buffer_size, inptr) == buffer_size) {
if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff &&
buffer[3] == 0xe0) {
if (outptr != NULL) {
fclose(outptr);
}
char filename[26];
sprintf(filename, "%03zu.JPEG", jpeg++);
outptr = fopen(filename, "w");
}
if (outptr != NULL) {
fwrite(buffer, sizeof *buffer, buffer_size, outptr);
}
}
if (outptr != NULL) {
fwrite(buffer, sizeof *buffer, buffer_size, outptr);
}
if (outptr != NULL) {
fclose(outptr);
}
fclose(inptr);
}
Note: This example is clearly not perfect, this will be better to make a true parser for jpeg file to have a better control flow. Here we suppose that all gonna be right.
how do you know that an instance of a JPEG image will always end with '\n'? Or better, how do you know that a JPEG image will be an exact multiple of 512?
You dont know.
So the posted code needs to calculate the actual value OR use some method to have the last call to fread() for any specific JPEG instance, to stop reading at the end of that image,
Then the check for the ID bytes of the next JPEG image will find the next image.
Otherwise, the start of the next image is already written to the prior output file and the check for a new image will fail.
In general this will result in the last created file containing more than one image.
This link: 'https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format' is a web page that describes the format of a JPEG file.
On every digital camera that I have used, the SD card has a directory of all the files.
Suggest using that directory and the info in the linked web page to find each JPEG image and to determine when the end of that image has been encountered. (I.E. the 0xFF 0xD9)

Resources