Matching JPEG signature - c

I am trying to scan a file looking for 1MB JPEGs that would be stored contiguously. My approach is create a structure to match the first 4 bytes with the JPEG signature and, if true, write the entire 512 buffer to a named file until I find another jpeg signature then I create a new file. The code below creates 2 files, neither of which are readable as the first few bytes are not part of the jpeg signature. Any ideas where I went wrong? I tried variations of my if statement where I test for the signature but no luck thus far.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
typedef uint8_t BYTE;
typedef struct
{
BYTE first;
BYTE second;
BYTE third;
BYTE fourth;
}
JPGTEST;
int main(int argc, char* argv[])
{
FILE* inptr = fopen("card.raw", "r");
if (inptr == NULL)
{
printf("Could not open file\n");
return 2;
}
FILE* outptr;
//initialize jpeg count and variable for filename
int count = 0;
char name[8];
//allocate memory
char buffer[512];
JPGTEST myjpg;
int is_open = 0;
while (fread(&buffer, 512, 1, inptr) != 0)
{
//test first 4 bytes to see if jpeg
fread(&myjpg, sizeof(JPGTEST), 1, inptr);
//if match, name and write to file
if (myjpg.first == 0xff && myjpg.second == 0xd8 && myjpg.third == 0xff && (myjpg.fourth == 0xe0 || myjpg.fourth == 0xe1))
{
sprintf(name, "%03d.jpg", count);
if (is_open == 0)
{
outptr = fopen(name, "w");
fwrite(buffer, sizeof(buffer),1,outptr);
is_open = 1;
}
if (is_open == 1)
{
fclose(outptr);
outptr = fopen(name, "w");
fwrite(buffer, sizeof(buffer),1,outptr);
count++;
}
}
else
{
if (is_open == 1)
{
fwrite(buffer, sizeof(buffer),1,outptr);
}
}
}
fclose(inptr);
fclose(outptr);
return 0;
}

You are opening the files in text mode. You need to open them in binary mode instead:
FILE* inptr = fopen("card.raw", "rb");
outptr = fopen(name, "wb");
Aside from that, you are calling fread() too many times, and just generally not managing the files correctly.
Try something more this instead:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
typedef uint8_t BYTE;
#pragma pack(push, 1)
typedef struct
{
BYTE first;
BYTE second;
BYTE third;
BYTE fourth;
}
JPGTEST;
#pragma pack(pop)
int main(int argc, char* argv[])
{
FILE* inptr = fopen("card.raw", "rb");
if (inptr == NULL)
{
printf("Could not open file\n");
return 2;
}
FILE* outptr = NULL;
//initialize jpeg count and variable for filename
int count = 0;
char name[8];
//allocate memory
char buffer[512];
JPGTEST myjpg;
while (fread(buffer, sizeof(buffer), 1, inptr) > 0)
{
//test first 4 bytes to see if jpeg
memcpy(&myjpg, buffer, sizeof(JPGTEST));
//if match, name and write to file
if ((myjpg.first == 0xff) && (myjpg.second == 0xd8) && (myjpg.third == 0xff) && ((myjpg.fourth == 0xe0) || (myjpg.fourth == 0xe1)))
{
if (outptr != NULL)
{
fclose(outptr);
outptr = NULL;
}
++count;
sprintf(name, "%03d.jpg", count);
outptr = fopen(name, "wb");
}
if (outptr != NULL)
fwrite(buffer, sizeof(buffer), 1, outptr);
}
fclose(inptr);
if (outptr != NULL)
fclose(outptr);
return 0;
}

Replace your second fread inside the loop with the below:
memcpy((void *)&myjpg, (void *) buffer, sizeof(JPGTEST));
And include string.h header file in your code for memcpy function.
What you are doing wrong is that after reading first 512 you are not making any use of them rather you are again reading 4 bytes without checking first 4 bytes of previously read 512 bytes.

The problem is you are not resetting the file position indicator after you read into buffer, to do so use, fseek. I.e.:
//test first 4 bytes to see if jpeg
fseek (inptr, SEEK_SET, 0);
fread(&myjpg, sizeof(JPGTEST), 1, inptr);
That will provide you with a test of the jpg header in myjpg:
sizeof (myjpg): 4
first: ff second: d8 third ff fourth: e0
However, that will also cause logic issues you will have to rework. It is better to simply read the values from buffer as suggested by the other answer.

You can do something like the following to simplify your signature comparisons:
#include ...
#define JPEG_SIGNATURE 0xFFD8
void
reverse_bytes (void const *data, long unsigned size)
{
char *ptr = (char *)data;
int i = 0;
while (i < size / 2)
{
char temp = ptr[i];
ptr[i] = ptr[size - i - 1];
ptr[size - i - 1] = temp;
i++;
}
}
int
main()
{
FILE *fptr = fopen("path/to/image.jpeg", "rb");
short bytes = 0;
fread(&bytes, sizeof(char), sizeof(bytes), fptr);
reverse_bytes(bytes, sizeof(bytes)); // Refer to [1]
switch (bytes)
{
case JPEG_SIGNATURE:
printf("JPEG image!");
break;
default:
printf("Unknown format!");
break;
}
return 0;
}
This can be extended to many different formats by adding more cases in the switch statement and a little more work.
This is indeed not a full or proper answer but, I hope it is helpful for others who come through this post!
NOTE: I omitted things like exception handling for brevity!
References:
Why is Fread reading unsigned-int in reverse order?

Related

I cannot figure out why fread will not populate my buffer properly. CS50 recovery

I've torn through this code in gdb for hours. I know that fread is returning the appropriate amount of bytes (512). Even checked the $eax print to confirm. Is there any chance someone may be able to give me a hint at what's wrong with my logic?
I thought that the headers may be offset from the beginning of the file, so I figured on first read going through byte by byte looking for the hex match and setting fseek would do the trick. No such luck. Printing match results in a 0.
#include <stdio.h>
#include <getopt.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
typedef uint8_t BYTE;
typedef enum { false, true } boolean;
int main(int argc, char *argv[])
{
// get filenames from cml input and open file
char *infile = argv[optind];
char *fileName = "image";
FILE *rawData = fopen(infile, "r");
FILE *imgJPG = fopen(fileName, "w");
int match = 0;
int imgCnt = 0;
// buffer to hold 512 bytes of file data - FAT file system
BYTE *FATbuffer = (BYTE *)malloc(sizeof(BYTE) * 512);
if (rawData == NULL)
{
printf("Error processing file. Exiting...");
return 1;
}
// begin reading raw data and writing it to buffer
while (fread(FATbuffer, sizeof(BYTE), 512, rawData) == 512)
{
if (imgCnt == 0)
{
for (int c = 0; c < 512; c++)
{
if (FATbuffer[c + 0] == 0xff &&
FATbuffer[c + 1] == 0x8d &&
FATbuffer[c + 2] == 0xff)
{
fseek(rawData, c, SEEK_SET);
imgCnt++;
match++;
}
}
}
else
{
if (FATbuffer[0] == 0xff &&
FATbuffer[1] == 0x8d &&
FATbuffer[2] == 0xff &&
imgCnt > 0)
{
sprintf(fileName, "%d.jpg", imgCnt);
fclose(imgJPG);
imgCnt++;
}
if (imgJPG == NULL)
{
printf("Error processing file. Exiting...");
return 3;
}
fwrite(FATbuffer, sizeof(BYTE), 512, imgJPG);
}
}
printf("%d\n", match);
// file processed, free memory
free(FATbuffer);
return 0;
}
There are multiple problems in your code:
you should test if a command line argument is available (after handling options, code you probably removed for posting).
files must be open in binary mode to avoid potential end of translation.
you should delay opening imgJPG until you have found the JPG header.
there is no need to allocate FATbuffer, defining a 512 byte array with automatic storage is fine.
you scan for the JPG signature one block at a time, but you might miss the signature if it spans a 512 byte boundary and you access 2 bytes beyond the end of the FATbuffer array when c is greater than 509.
fseek(rawData, c, SEEK_SET); set the file position at the offset from the beginning of the array, not from the beginning of the file.
sprintf(fileName, "%d.jpg", imgCnt); attempts to overwrite a string constant. This has undefined behavior. You probably meant this:
char fileName[64];
snprintf(fileName, sizeof fileName, "image%d.jpg", imgCnt);
FILE *imgJPG = fopen(fileName, "wb");
Here is a modified version that can extract JPG files embedded anywhere in a data stream:
#include <errno.h>
#include <getopt.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
// get filenames from cml input and open file
if (optind >= argc)
return 1;
char *infile = argv[optind];
FILE *rawData = fopen(infile, "rb"); // open the disk image
char fileName[64];
FILE *imgJPG = NULL;
int imgCnt = 0;
// buffer to hold 512 bytes of file data - FAT file system
// add an extra 2 bytes to match signature across sector boundaries
uint8_t FATbuffer[514];
// begin reading raw data into buffer
int pos = 2;
while (fread(FATbuffer + 2, 1, 512, rawData) == 512) {
for (int c = pos; c < 512; c++) {
if (FATbuffer[c + 0] == 0xff &&
FATbuffer[c + 1] == 0x8d &&
FATbuffer[c + 2] == 0xff) {
// found signature: skip to a new file
if (imgJPG) {
// write the end of the current image
fwrite(FATbuffer + pos, c - pos, 1, imgJPG);
fclose(imgJPG);
}
pos = c;
imgCnt++;
snprintf(fileName, sizeof fileName, "image%d.jpg", imgCnt);
imgJPG = fopen(fileName, "wb");
if (imgJPG == NULL) {
fprintf(stderr, "Cannot create file %s: %s\n",
fileName, strerror(errno));
return 3;
}
}
}
if (imgJPG) {
// write end of block to current image
fwrite(FATbuffer + pos, 512 - pos, 1, imgJPG);
}
// copy the last 2 bytes to test for signature overlapping blocks
FATbuffer[0] = FATbuffer[512];
FATbuffer[1] = FATbuffer[513];
// uncopied file data starts a 0 now.
pos = 0;
}
if (imgJPG) {
// write last 2 bytes to current image
fwrite(FATbuffer, 2, 1, imgJPG);
fclose(imgJPG);
}
printf("%d\n", imgCnt != 0);
printf("%d images extracted\n", imgCnt);
return 0;
}
If you can assume the signature to be at the start of a sector, the code can be simplified:
#include <errno.h>
#include <getopt.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
// get filenames from cml input and open file
if (optind >= argc)
return 1;
char *infile = argv[optind];
FILE *rawData = fopen(infile, "rb"); // open the disk image
char fileName[64];
FILE *imgJPG = NULL;
int imgCnt = 0;
// buffer to hold 512 bytes of file data - FAT file system
uint8_t FATbuffer[512];
// begin reading raw data into buffer
while (fread(FATbuffer, 1, 512, rawData) == 512) {
if (FATbuffer[c + 0] == 0xff &&
FATbuffer[c + 1] == 0x8d &&
FATbuffer[c + 2] == 0xff) {
// found signature: skip to a new file
if (imgJPG) {
fclose(imgJPG);
}
imgCnt++;
snprintf(fileName, sizeof fileName, "image%d.jpg", imgCnt);
imgJPG = fopen(fileName, "wb");
if (imgJPG == NULL) {
fprintf(stderr, "Cannot create file %s: %s\n",
fileName, strerror(errno));
return 3;
}
}
if (imgJPG) {
// write end of block to current image
fwrite(FATbuffer, 512, 1, imgJPG);
}
}
if (imgJPG) {
fclose(imgJPG);
}
printf("%d\n", imgCnt != 0);
printf("%d images extracted\n", imgCnt);
return 0;
}

C recover compiles but return null for the files read

Trying to solve this problem i keep finding the same errors:
:) recover.c exists.
:) recover.c compiles.
:) handles lack of forensic image
:( recovers 000.jpg correctly
000.jpg not found
:( recovers middle images correctly
001.jpg not found
:( recovers 049.jpg correctly
049.jpg not found
That makes me think that the files are not been read or opened at all and i can't find why because the code compiles.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
int main(int argc, char* argv[])
{
//checks if there is an input
if (argc != 2)
{
printf("./recover Usage: ./recover image \n");
return 1;
}
else
{
char* filename = argv[1];
FILE* input = fopen(filename, "r");
if (strcmp(filename, "card.raw") != 0)
{
printf("Unable to open: %s\n", filename);
return 2;
}
else
{
//Create buffer
uint8_t buffer[512];
//Create pointer for the buffer
FILE* output = NULL;
// create 8 bytes file
char files[8];
//Files counters
int jpeg_counter = 0;
//Check
while (fread(buffer, sizeof(buffer), 1, input))
{
//check 4 first bytes of file to see if they contain the jpg signature
if ((buffer[0] == 0xff) && (buffer[1] == 0xd8) && (buffer[2] == 0xff) && ((buffer[3] & 0xf0) == 0xe0))
{
if (output != NULL) {
// sprintf(char *str, const char *format, ...) and "03i" means 3 digits in format 001, 002...
sprintf(files, "%03i.jpg", jpeg_counter);
//use created pointer to save jpeg files from input folder card.raw
output = fopen(filename, "w");
//add jpeg to the new buffer using ouput pointer
fwrite(buffer, sizeof(buffer), 1, output);
// update counter
jpeg_counter++;
}
else //no more files to read(end of folder array) - or no images found.
{
printf("Could not open file\n");
return 0;
}
}
}
fclose(output);
fclose(input);
return 0;
}
}
}
Here is another attempt at a solution, however, it only writes out the first 512 bytes (unlike the original) but I don't think jpeg uses 512 bytes fixed chunks so neither the op or or #BarmakShemirani solution would work as expected. (buffer[3] & 0xf0) == 0xe0 means variable sized, application specific and there is a reference that data is laid out like TIFF, In either case op did not share card.raw so whatever format is used would be speculation:
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define FILENAME_LEN 8
int main(int argc, char* argv[]) {
int r = 0;
FILE *input = NULL;
if (argc != 2) {
printf("./recover Usage: ./recover image \n");
r = 1;
goto out;
}
char *filename = argv[1];
input = fopen(filename, "rb");
if (!strcmp(filename, "card.raw")) {
printf("Unable to open: %s\n", filename);
r = 2;
goto out;
}
for(int jpeg_counter = 0; !r; jpeg_counter++) {
uint8_t buffer[512];
size_t n = fread(buffer, sizeof(buffer), 1, input);
// use an existing library instead?
if(
n < 4 ||
buffer[0] != 0xff ||
buffer[1] != 0xd8 ||
buffer[2] != 0xff ||
(buffer[3] & 0xf0) != 0xe0
) {
printf("invalid header\n");
r = 3;
break;
}
char filename2[FILENAME_LEN];
if(snprintf(filename2, FILENAME_LEN, "%03i.jpg", jpeg_counter) >= FILENAME_LEN) {
printf("%s: output filename truncated", filename);
r = 4;
break;
};
FILE *output = fopen(filename2, "wb");
if(!output) {
printf("%s: fopen failed\n", filename);
r = 5;
break;
}
// TBD: replace with a loop once we figure out
// how a given file is terminated.
if(fwrite(buffer, n, 1, output) != n) {
printf("%s: write failed\n", filename);
r = 6;
}
fclose(output);
}
out:
if(input) fclose(input);
return r;
}
uint8_t buffer[512];
fread(buffer, sizeof(buffer), 1, input)
This should be change to: fread(buffer, 1, sizeof(buffer), input). This way fread will read up to 512 bytes. When it gets to the end of file, it reads whatever is left, for example 1 byte, and returns that number. Likewise, fwrite should change. It should write the same number which was read earlier.
Open the file in binary mode.
If fopen fails then stop immediately.
Check the file header only once, not every read loop.
Your condition for checking the file header may have a typo (buffer[3] & 0xf0) == 0xe0. Checking the first 3 bytes should be okay.
int main()
{
FILE* input = fopen("input.jpg", "rb");
if (!input)
{ perror("input error"); return 0; }
FILE* output = fopen("output.jpg", "wb");
if (!output)
{ perror("output error"); fclose(input); return 0; }
uint8_t buf[1000];
int check_header = 1;
while (1)
{
size_t read_count = fread(buf, 1, sizeof(buf), input);
if (!read_count)
break;
if (check_header)
{
if (buf[0] == 0xff && buf[1] == 0xd8 && buf[2] == 0xff)
// && buf[3] == 0xe0 ?
{ printf("header is okay\n"); }
else
{ printf("not okay\n"); break; }
check_header = 0; //don't check header again
}
fwrite(buf, 1, read_count, output);
}
fclose(input);
fclose(output);
return 0;
}
Thanks for the help. Tried those and got the same error. What surprisingly worked for me was to refactor even more the code and give use of the counter for the if conditions as shown below.
int main(int argc, char* argv[])
{
//checks if there is an input
if (argc != 2)
{
printf(".Usage: Looking for card.raw \n");
return 1;
}
else
{
char* filename = argv[1];
FILE* input;
input = fopen(filename, "r");
//to track if it fails to open
if (input == NULL)
{
printf("Could not open file");
return 2;
}
//Create buffer. Unsigned int variable type. Array of 512 bytes
uint8_t buffer[512];
//Create pointer for the buffer.
FILE* output = NULL;
// create 8 bytes file
char files[8];
//Files counters
int jpeg_counter = 0;
//Check
while (fread(buffer, sizeof(buffer), 1, input))
{
//check 4 first bytes of file to see if they contain the jpg signature
if ((buffer[0] == 0xff) && (buffer[1] == 0xd8) && (buffer[2] == 0xff) && ((buffer[3] & 0xf0) == 0xe0))
{
if (jpeg_counter != 0)
{
fclose(output);
}
// sprintf(char *str, const char *format, ...) and "03i" means 3 digits in format 001, 002...
sprintf(files, "%03i.jpg", jpeg_counter);
//use created pointer to save jpeg files from input folder card.raw
output = fopen(files, "w");
//add jpeg to the new buffer using ouput pointer
fwrite(buffer, sizeof(buffer), 1, output);
// update counter
jpeg_counter++;
} else if (jpeg_counter != 0)
{
fwrite(buffer, sizeof(buffer), 1, output);
}
}
fclose(output);
return 0;
}
}

Recovering partial deleted data on memory card using C

I have been working on an assignment that recovers deleted data from the memory card so that the image is viewable. My code compiles and runs fine and produces viewable jpeg images. The program also automatically titles the jpeg image by assigning a number to it. However, when I run it, it is only able produce about half of the images. The other half were unviewable, with random numbers as titles and the error 'unsupported image format' is shown. I'm not too sure what went wrong with my code and I'm fairly new to the intricacies but I suspect it has something to do with memory. Here is my code, any help would be really nice:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
bool function(unsigned char arr[], FILE *pointer);
int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr,"Usage: ./recover image");
return 1;
}
// opening the memory card
FILE *memorycard = fopen(argv[1], "r");
if (memorycard == NULL)
{
fprintf(stderr, "Could not open %s.\n", argv[1]);
return 2;
}
unsigned char buffer[512];
int i = 0;
char filename[8];
while(fread(buffer,1, 512, memorycard) == 512)
{
if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff && (buffer[3] & 0xf0) == 0xe0)
{
//create a new file
sprintf(filename, "%03i.jpg", i);
//write the values currently stored in the buffer to the file
FILE *img = fopen(filename, "w");
fwrite(buffer, sizeof(char), 512, img);
memset (buffer, 0, 512);
//add one to the title of the next jpeg file
i++;
do
{
//read the next chunk of 512 bytes
fread(buffer,1, 512, memorycard);
}
while (function(buffer, img) == true);
}
}
}
bool function(unsigned char arr[], FILE *pointer)
{
if (arr[0] == 0xff && arr[1] == 0xd8 && arr[2] == 0xff && (arr[3] & 0xf0) == 0xe0)
{
//rewind by 512 bytes
fseek(pointer, -512, SEEK_CUR);
//close file being written to
fclose(pointer);
memset (arr, 0, 512);
return false;
}
else
{
//write values into the currently opened file
fwrite(arr, sizeof(char), 512, pointer);
memset(arr, 0, 512);
return true;
}
}
I changed a couple things in your code. In general try to always keep the file "cursor" moving in one direction (try not to seek backwards too much) because this can ruin time complexity especially for large sd cards. This is the modified code I came up with let me know if it works or might clear anything up for you. I haven't actually run this because I don't have anything to test with; however it should work in theory.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
struct _imgblock{
// The first 4 bytes have a magic sequence and it is just easier to view them all as 1 integer
int32_t magicTag;
char buffer[508];
}__attribute__((packed));
typedef union{
struct _imgblock imageBlock;
unsigned char dataBuffer[512];
} Buffer;
// make sure this function matches the sequence sepcified in the original code (that the low order byte & 0xf0 is 0xe0 and the other bytes match 0xffd8ff)
bool buffer_starts_with_magic(Buffer* buff){
return (buff->imageBlock.magicTag & 0xf0) == 0xe0 && (buff->imageBlock.magicTag >> 8) & 0xffd8ff;
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr,"Usage: ./recover image");
return 1;
}
// opening the memory card
FILE *memorycard = fopen(argv[1], "r");
if (memorycard == NULL)
{
fprintf(stderr, "Could not open %s.\n", argv[1]);
return 2;
}
Buffer custom_buff;
int i = 0;
char filename[8];
FILE* openFile = NULL;
// Start reading 512 byte buffers from the file
while(fread(&custom_buff,1, sizeof(Buffer), memorycard) == sizeof(Buffer))
{
// If the buffer starts with the magic sequence found in the original code and a file is open
if(buffer_starts_with_magic(&custom_buff) && openFile){
// then end the current file and start a new one
fclose(openFile);
openFile = NULL;
}else if(openFile){
// else if the buffer doesnt start with the magic sequence append data and keep reading
goto appendBuffer;
}
//create the new file
sprintf(filename, "%03i.jpg", i++);
openFile = fopen(filename,"w");
appendBuffer: fwrite(custom_buff.dataBuffer,1,512,openFile);
}
if(openFile)
fclose(openFile);
openFile = NULL;
fclose(memorycard);
}

How can I properly print the bytes of a JPEG file? - CS50 PSET3 Recover

I am trying to use fread on a file containing multiple JPEGs and write the JPEGs into new files, but before I can do that I need to properly look through the file and look for the JPEGs based on their first bytes based on the if statement at the bottom of the code below.
I have not been able to get into the if statement, and have been trying to print out the bytes, but I have been running into issues in printing.
I'm looking to print just the 0 byte of the buffer, but my output is looking like this:
711151a6
cec117f0
7603c9a9
73599166
I'm very new to C and fread, and any help would be appreciated!
Code:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
// Check for 2 arguments, the name of the program and the file being read
if (argc != 2)
{
printf("Usage: ./recover image\n");
return 1;
}
else
{
//Open the file
FILE * fp;
fp = fopen(argv[1], "r");
//Get file length
fseek(fp, 0, SEEK_END);
int f_length = ftell(fp);
fseek(fp, 0, SEEK_SET);
// If not file is found then exit
if(fp == NULL)
{
printf("File not found\n");
return 2;
}
// Allocate buffer for fread function
int *buffer = (int*)malloc(f_length);
if (buffer == NULL)
{
printf("Buffer is null\n");
return 1;
}
// Read thorugh the file
while(fread(buffer, 512, 1, fp) == 1)
{
for (int i = 0; i < 1; i++)
{
printf("%x\n", buffer[i]);
}
if (buffer[0] == 0xff && buffer[1] == 0xd8 && buffer[2] == 0xff && (buffer[3] & 0xf0) == 0xe0)
{
printf("Found a jpg\n");
}
}
// Exit the program
return 0;
}
}
int *buffer is not correct because the intention is to deal with bytes and not ints. If int * is used, then for example, buffer[0] will be the first 4 bytes and not the first byte as intended. Change that to unsigned char *buffer.
So explicitly, that line should be the following (including removing the unnecessary cast):
unsigned char *buffer = malloc(f_length);

Recovering JPEG image from .raw file using C

I am doing a problem set provided by Harvard's online lecture.
I've finally made a solution to recover a set of JPEG images from a file (card.raw).
It seems like the code itself does not throw errors, but it is returning distorted image and I am a little clueless to why it might be happening.
[Link to an image example] https://prnt.sc/q0tb4f
Here's my code
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
int main(int argc, char *argv[])
{
//check usage
if (argc != 2)
{
return 1;
}
// open file
FILE* input = fopen(argv[1], "r");
// return error if file does not existS
if (!input)
{
return 1;
printf("file does not exists");
}
// create an array with 512 bytess of bytes
unsigned char bytes[512];
// create count variable
int count = 0;
// create an empty string for filename
char filename[7];
// create an empty output file
FILE* output = NULL;
// repeat until end of input
while (fread(bytes, 1, sizeof(bytes), input) != 0)
{
// read 1 block of 512 bytes at a time
fread(bytes, 1, sizeof(bytes), input);
// check if beginning of jpeg file
if (bytes[0] == 0xff && bytes[1] == 0xd8 && bytes[2] == 0xff && (bytes[3] & 0xf0) == 0xe0)
{
// if already found a jpeg, close the file
if (count > 0)
{
fclose(output);
}
// name file
sprintf(filename, "%03i.jpg", count);
// open file
output = fopen(filename, "w");
// write file
fwrite(bytes, 1, sizeof(bytes), output);
// increment count
count++;
}
if (output != NULL)
{
// keep writing if jpeg header is already found
fwrite(bytes, 1, sizeof(bytes), output);
}
}
fclose(output);
fclose(input);
}
My uneducated assumption is unable to see why it might be happening.
I can only imagine that this might be happening from opening and closing files in improper step.
This is the problem:
while (fread(bytes, 1, sizeof(bytes), input) != 0)
{
// read 1 block of 512 bytes at a time
fread(bytes, 1, sizeof(bytes), input);
You're calling fread twice per loop. As a result, the body of the loop only sees the odd-numbered blocks. Remove the second fread.
A second problem (as #SteveFriedl points out) is that the buffer the code uses for the filename is too small.
char filename[7];
sprintf(filename, "%03i.jpg", count);
You need at least 8 bytes for a file name like "123.jpg", because you need room for the NUL terminator. However, note that
"%03i" uses at least 3 characters. It could use more, e.g. if count reaches 1000. So I would declare the buffer as char filename[32]; to avoid any chance of buffer overflow.
You've also got two fwrites when only one is needed:
output = fopen(filename, "w");
// write file
fwrite(bytes, 1, sizeof(bytes), output);
// increment count
count++;
}
if (output != NULL)
{
// keep writing if jpeg header is already found
fwrite(bytes, 1, sizeof(bytes), output);
}
After opening a new file, the code writes the first block, and then writes it again. Remove the first fwrite and let the second fwrite take care of the first block.
Another problem is that the code makes an implicit assumption that if fread doesn't return 0, then it's read a full block. That assumption is OK if the file size is a multiple of the block size, but it's better not to make any assumptions. So the condition in the while loop should be
while (fread(bytes, 1, sizeof(bytes), input) == sizeof(bytes))
here is recover working.
#include <stdio.h>
#include <stdlib.h>
int isAJpg(unsigned char bytes[]);
int main(int argc, char *argv[])
{
if (argc != 2)
{
return 1;
}
FILE *file = fopen(argv[1], "r");
if (file == NULL)
{
return 1;
}
char filename[10];
int count = 0;
unsigned char bytes[512];
FILE *output;
int jpgfound = 0;
while (fread(bytes, 512, 1, file) != 0)
{
// if it is a jpg
if (isAJpg(bytes) == 1)
{
if (jpgfound == 1)
{
fclose(output);
}
else
{
jpgfound = 1;
}
// name file
sprintf(filename, "%03d.jpg", count);
// open file
output = fopen(filename, "a");
count++;
}
if (jpgfound == 1)
{
// writes to a file
fwrite(&bytes, 512, 1, output);
}
}
//close the files
fclose(output);
fclose(file);
}
// check in it is a jpg
int isAJpg(unsigned char bytes[])
{
if (bytes[0] == 0xff && bytes[1] == 0xd8 && bytes[2] == 0xff && (bytes[3] & 0xf0) == 0xe0)
{
return 1;
}
return 0;
}

Resources