reading from a file . C - c

I want to read this symbols with fgetc() ( I cant copy it, here link to file.txt )
This file is 2-byte long and its binary dump is 1A 98.
fgetc() cant read it and return -1. Please help((
for (int k = 0; k < fileSize; k++)
{
buffer[k] = (unsigned char) fgetc(f);
}
Picture of this symbols:
Maybe something with character encoding?

Your file contains 0x1A, which means EOF and reading it will stop reading when the file is opened in text mode.
Try opening your file in binary mode.
Here is a test code:
#include <stdio.h>
int main(void) {
const char *fileName = "codeText.txt";
FILE* fp;
int input;
fp = fopen(fileName, "r");
if (fp==NULL) return 1;
puts("text mode:");
while((input = getc(fp)) != EOF) printf("%02X\n", (unsigned int)input);
fclose(fp);
fp = fopen(fileName, "rb");
if (fp == NULL) return 1;
puts("binary mode:");
while((input = getc(fp)) != EOF) printf("%02X\n", (unsigned int)input);
fclose(fp);
return 0;
}

Related

How to convert a text file from DOS format to UNIX format

I am trying to make a program in C, that reads a text file and replace \r\n with \n to the same file converting the line ending from DOS to UNIX. I use fgetc and treat the file as a binary file. Thanks in advance.
#include <stdio.h>
int main()
{
FILE *fptr = fopen("textfile.txt", "rb+");
if (fptr == NULL)
{
printf("erro ficheiro \n");
return 0;
}
while((ch = fgetc(fptr)) != EOF) {
if(ch == '\r') {
fprintf(fptr,"%c", '\n');
} else {
fprintf(fptr,"%c", ch);
}
}
fclose(fptr);
}
If we assume the file uses a single byte character set, we just need to ignore all the '\r' characters when converting a text file form DOS to UNIX.
We also assume that the size of the file is less than the highest unsigned integer.
The reason we do these assumptions, is to keep the example short.
Be aware that the example below overwrites the original file, as you asked. Normally you shouldn't do this, as you can lose the contents of the original file, if an error occurs.
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
// Return a negative number on failure and 0 on success.
int main()
{
const char* filename = "textfile.txt";
// Get the file size. We assume the filesize is not bigger than UINT_MAX.
struct stat info;
if (stat(filename, &info) != 0)
return -1;
size_t filesize = (size_t)info.st_size;
// Allocate memory for reading the file
char* content = (char*)malloc(filesize);
if (content == NULL)
return -2;
// Open the file for reading
FILE* fptr = fopen(filename, "rb");
if (fptr == NULL)
return -3;
// Read the file and close it - we assume the filesize is not bigger than UINT_MAX.
size_t count = fread(content, filesize, 1, fptr);
fclose(fptr);
if (count != 1)
return -4;
// Remove all '\r' characters
size_t newsize = 0;
for (long i = 0; i < filesize; ++i) {
char ch = content[i];
if (ch != '\r') {
content[newsize] = ch;
++newsize;
}
}
// Test if we found any
if (newsize != filesize) {
// Open the file for writing and truncate it.
FILE* fptr = fopen(filename, "wb");
if (fptr == NULL)
return -5;
// Write the new output to the file. Note that if an error occurs,
// then we will lose the original contents of the file.
if (newsize > 0)
count = fwrite(content, newsize, 1, fptr);
fclose(fptr);
if (newsize > 0 && count != 1)
return -6;
}
// For a console application, we don't need to free the memory allocated
// with malloc(), but normally we should free it.
// Success
return 0;
} // main()
To only remove '\r' followed by '\n' replace the loop with this loop:
// Remove all '\r' characters followed by a '\n' character
size_t newsize = 0;
for (long i = 0; i < filesize; ++i) {
char ch = content[i];
char ch2 = (i < filesize - 1) ? content[i + 1] : 0;
if (ch == '\r' && ch2 == '\n') {
ch = '\n';
++i;
}
content[newsize++] = ch;
}

Basic File IO in C Produces all a's

I am using CodeBlocks on Windows to compile.
Why the program gives me this answer? Why there are so much as and don't get the answer 123456abcdef?
#include <stdio.h>
#include <stdlib.h>
int main(void) {
FILE *fp;
char s[100] = "abcdef";
char c1 = '0';
int i = 0;
fp = fopen("ot.txt", "w");
if (fp == NULL) {
printf("file open error");
exit(0);
}
while (s[i] != '\0') {
fputc(s[i], fp);
i++;
printf("%d", i);
}
while (c1 != EOF) {
c1 = fgetc(fp);
putchar(c1);
}
fclose(fp);
}
There are multiple problems in your code:
c1 should be defined with type int to accommodate for all values returned by fgetc(). a char cannot unambiguously store EOF.
You should open the file in write+update mode "w+"
You should rewind the stream pointer before reading back from it for 2 reasons: a seek operation is required between read and write operations and you want to read the characters from the start of the file.
You need to test for EOF after reading a byte with fgetc(), otherwise you will output the EOF converted to unsigned char to stdout before exiting the loop.
It is good style to return 0; from main() to indicate success and non-zero to indicate failure.
Here is a corrected version:
#include <stdio.h>
int main(void) {
FILE *fp;
char s[] = "abcdef";
int i, c;
fp = fopen("ot.txt", "w+");
if (fp == NULL) {
printf("file open error\n");
return 1;
}
i = 0;
while (s[i] != '\0') {
fputc(s[i], fp);
i++;
printf("%d", i);
}
rewind(fp);
while ((c1 = fgetc(fp)) != EOF) {
putchar(c1);
}
printf("\n");
fclose(fp);
return 0;
}

C file to check if file is empty or contains ASCII text

I'm trying to write a program which should be able to take a file as input in terminal and then determine if the file is empty or written in ASCII text. But I keep getting segmentation fault 11.
My code is as follows:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
unsigned char c;
int size;
FILE *file = fopen(argv[1], "r");
fseek(&c, 0, SEEK_END);
size = ftell(file);
if (size == 0)
{
printf("file is empty\n");
}
fclose(file);
FILE *file = fopen(argv[1], "r");
c = fgetc(file);
if (c != EOF && c <= 127)
{
printf("ASCII\n");
}
fclose(file);
}
Any ideas as to why?
1] fseek doesnt have first argument unsgined char*, but FILE*.
fseek(file, 0, SEEK_END);
2] You shouldn't use unsigned char / char for checking for EOF, use int for sure.
3] Working and simplier code
int main(int argc, char *argv[])
{
if (argc < 2)
{
// err we havent filename
return 1;
}
int c;
FILE *file = fopen(argv[1], "r");
if (file == NULL)
{
// err failed to open file
return 1;
}
c = fgetc(file);
if (c == EOF)
{
printf("empty\n");
fclose(file);
return 0;
}
else
{
ungetc(c, file);
}
while ((c = fgetc(file)) != EOF)
{
if (c < 0 || c > 127)
{
// not ascii
return 1;
}
}
printf("ascii\n");
fclose(file);
return 0;
}
fseek(&c, 0, SEEK_END);
Here you are supposed to pass file descriptor, like
fseek(file, 0, SEEK_END);
fseek takes a FILE* as a parameter and you are giving it a unsigned char* - change &c to file.

fgetc misses bytes while reading a file

I use the function fgetc to read each byte of a file, and then write it with printf.
I just noticed that sometimes, fgetc just miss some bytes, when I compare my result with a hex editor.
For example, the first mistake starts around the 118th byte, and a lot of other mistakes randomly ...
Somebody ever experienced this?
This is the code (Windows)
char main(int argc, char* argv[]) {
FILE* fdIn;
FILE* fdOut;
long size = 0;
long i = 0;
char c = 0;
if (argc == 3) {
if ((fdIn = fopen(argv[1], "rt")) == NULL) {
printf("FAIL\n");
return 0;
}
if ((fdOut = fopen(argv[2], "w+")) == NULL) {
printf("FAIL\n");
return 0;
}
fseek(fdIn, 0L, SEEK_END);
size = ftell(fdIn);
fseek(fdIn, 0L, 0);
fprintf(fdOut, "unsigned char shellcode[%ld] = {", size);
while (i < size) {
c = fgetc(fdIn);
if (!(i % 16))
fprintf(fdOut, "\n\t");
fprintf(fdOut, "0x%02X", (unsigned char)c);
if (i != size - 1)
fprintf(fdOut, ", ");
i++;
}
fprintf(fdOut, "\n};\n");
fclose(fdIn);
fclose(fdOut);
printf("SUCCESS");
system("PAUSE");
}
return 0;
}
Open the file in binary mode.
// if ((fdIn = fopen((char*)argv[1], "rt")) == NULL) {
// >.<
if ((fdIn = fopen((char*)argv[1], "rb")) == NULL) {
In text mode, and likely a Windows based machine given the "rt", a '\r', '\n' pair is certainly translated into '\n'. IAC, no translations are needed for OP's goal of a hex dump.
2nd issue: fgetc() returns an int in the range of unsigned char or EOF. Use type int to distinguish EOF from all data input.
// char c = 0;
int c = 0;
...
c = fgetc(fdIn);
// also add
if (c == EOF) break;

Number of Lines Mystery?

My goal is to implement a function that calculates the number of lines in a file. And empty file is considered to have no lines. If the last line of the given file is not empty, it should be counted as a line despite not ending with a newline character.
I've come up with the following code:
int linecount(const char *filename)
{
FILE *f = fopen(filename, "r");
if(!f)
return -1;
int lines = 0;
int c = 0;
int n = 0;
while((c = fgetc(f)) != EOF){
if(c == '\n')
lines++;
n++;
}
if(n==0)
return 0; //return 0 if the file is empty
if(c!='\n' && !isspace(c))
lines++; //count the last line if it's not empty
fclose(f);
return lines;
}
However, even after playing with it for over an hour I can't figure out why its return value lines is one too large in some cases...
You were close, here how you could do it:
int linecount(const char *filename) {
FILE *f = fopen(filename, "r");
if (!f)
return -1;
int lines = 0;
int c = 0;
int n = 0;
int read_line = 0;
while ((c = fgetc(f)) != EOF) {
read_line = 1;
if (c == '\n') {
lines++;
read_line = 0;
}
n++;
}
if (n == 0)
return 0; //return 0 if the file is empty
if(read_line)
lines++;
fclose(f);
return lines;
}
The idea is that we want to know if we started reading a line AND if we met a newline, at end of this line. So, we use another variable, called read_line and we use it as a flag.
We set it to 1 (true) if we just started reading a line and we set it to 0 (false) if we just met a newline (end of the line).
Now, if we have something like:
1[newline]
2[newline]
3
we will be OK, since we need to check if read_line after we read the file. Is so, we have to increment our line counter by one.
This is also OK:
1[newline]
2[newline]
3[newline]
since we saw three newlines and the read_line is 0 after we read the file.
Same goes for this case:
1[newline]
2[newline]
3[newline]
[nothing here]
since our flag is going to be equal to 0 after reading the file, since the 3rd newline should set it to 0 and we never actually enter the 4th line in our loop, since there is nothing to read.
With your previous implementation, as stated in the comments, this line:
if(c!='\n' && !isspace(c))
would be executed with c being equal to EOF.
Or you could just use fgets() and you are done. Check the example:
#include <stdio.h>
#include <string.h>
#define bufSize 1024
int main(int argc, char *argv[])
{
FILE *fp;
char buf[bufSize];
if ((fp = fopen("test.txt", "rb")) == NULL)
{ /* Open source file. */
perror("fopen source-file");
return 1;
}
int lines = 0;
while (fgets(buf, sizeof(buf), fp) != NULL)
{ /* While we don't reach the end of source. */
/* Read characters from source file to fill buffer. */
/* fgets will stop when it finds a newline. */
lines++;
}
printf("lines = %d\n", lines);
fclose(fp);
return 0;
}
Modify sample
int linecount(const char *filename)
{
FILE *f = fopen(filename, "r");
if(!f)
return -1;
int lines = 0;
int c = 0;
int flag = 1;
while((c = fgetc(f)) != EOF){
if(flag = (c == '\n'))
lines++;
}
if(!flag)
lines++; //count the last line if it's not empty
fclose(f);
return lines;
}
A simple solution can be
int linecount(const char *filename)
{
FILE *stream;
char *line = NULL;
size_t len = 0;
ssize_t read;
int numOfLines = 0;
stream = fopen(filename, "r");
if (stream == NULL)
exit(EXIT_FAILURE);
while ((read = getline(&line, &len, stream)) != -1) {
numOfLines++;
}
free(line);
fclose(stream);
return numOfLines;
}

Resources