Using file pointers correctly - c

I have a basic question about file pointers. In the code below i have a while loop followed by a for loop. The for loop only will show the line count unless i fopen the file again - is that normal? and if so should i fclose it after the while loop beforehand? There's probably some "rewind" function that i'm unaware of, so my whole approach might be wrong. I realize both the while loop and for can be combined, but this my question is about the fopen, fclose and using the data again from fopen.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAXLEN 200
enum { MAXLINES = 200 };
char lpath[MAXLINES][BUFSIZ];
int main(int argc, char **argv) {
char c;
int l = 0, n = 0, i = 0, count = 0;
char lines[MAXLINES][BUFSIZ];
FILE *fp = fopen(argv[1], "r");
if (fp == 0) {
fprintf(stderr, "failed to open input.txt\n");
exit(1);
}
while (l < MAXLINES && fgets(lines[l], sizeof(lines[0]), fp)) {
lines[l][strlen(lines[l])-1] = '\0';
puts(lines[l]);
l++;
}
// fp = fopen(argv[1], "r"); // below won't output unless fopen again
for (c = getc(fp); c != EOF; c = getc(fp)) {
if (c == '\n') {
count++;
}
printf(">> line count: %i", count);
fclose(fp);
}
looked also at: Pointer best practice

It's normal, you need to rewind() the file. The problem is that when the for loop start the file has reached the end, so reads will fail and feof() will return non-zero.
Two options
rewind(fp);
Or
fseek(fp, 0L, SEEK_SET);
When you call fopen() again you leak resources because you overwrite the pointer and now you can't fclose() the first fopen()ed file.

You could use rewind() as #iharob suggested, or you could just close and re-open the file:
while (l < MAXLINES && fgets(lines[l], sizeof(lines[0]), fp)) {
lines[l][strlen(lines[l])-1] = '\0';
puts(lines[l]);
l++;
}
fclose(fp);
fp = fopen(argv[1], "r"); // below won't output unless fopen again
for (c = getc(fp); c != EOF; c = getc(fp)) {
if (c == '\n') {
count++;
}
}

Related

How to convert a text file from DOS format to UNIX format

I am trying to make a program in C, that reads a text file and replace \r\n with \n to the same file converting the line ending from DOS to UNIX. I use fgetc and treat the file as a binary file. Thanks in advance.
#include <stdio.h>
int main()
{
FILE *fptr = fopen("textfile.txt", "rb+");
if (fptr == NULL)
{
printf("erro ficheiro \n");
return 0;
}
while((ch = fgetc(fptr)) != EOF) {
if(ch == '\r') {
fprintf(fptr,"%c", '\n');
} else {
fprintf(fptr,"%c", ch);
}
}
fclose(fptr);
}
If we assume the file uses a single byte character set, we just need to ignore all the '\r' characters when converting a text file form DOS to UNIX.
We also assume that the size of the file is less than the highest unsigned integer.
The reason we do these assumptions, is to keep the example short.
Be aware that the example below overwrites the original file, as you asked. Normally you shouldn't do this, as you can lose the contents of the original file, if an error occurs.
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
// Return a negative number on failure and 0 on success.
int main()
{
const char* filename = "textfile.txt";
// Get the file size. We assume the filesize is not bigger than UINT_MAX.
struct stat info;
if (stat(filename, &info) != 0)
return -1;
size_t filesize = (size_t)info.st_size;
// Allocate memory for reading the file
char* content = (char*)malloc(filesize);
if (content == NULL)
return -2;
// Open the file for reading
FILE* fptr = fopen(filename, "rb");
if (fptr == NULL)
return -3;
// Read the file and close it - we assume the filesize is not bigger than UINT_MAX.
size_t count = fread(content, filesize, 1, fptr);
fclose(fptr);
if (count != 1)
return -4;
// Remove all '\r' characters
size_t newsize = 0;
for (long i = 0; i < filesize; ++i) {
char ch = content[i];
if (ch != '\r') {
content[newsize] = ch;
++newsize;
}
}
// Test if we found any
if (newsize != filesize) {
// Open the file for writing and truncate it.
FILE* fptr = fopen(filename, "wb");
if (fptr == NULL)
return -5;
// Write the new output to the file. Note that if an error occurs,
// then we will lose the original contents of the file.
if (newsize > 0)
count = fwrite(content, newsize, 1, fptr);
fclose(fptr);
if (newsize > 0 && count != 1)
return -6;
}
// For a console application, we don't need to free the memory allocated
// with malloc(), but normally we should free it.
// Success
return 0;
} // main()
To only remove '\r' followed by '\n' replace the loop with this loop:
// Remove all '\r' characters followed by a '\n' character
size_t newsize = 0;
for (long i = 0; i < filesize; ++i) {
char ch = content[i];
char ch2 = (i < filesize - 1) ? content[i + 1] : 0;
if (ch == '\r' && ch2 == '\n') {
ch = '\n';
++i;
}
content[newsize++] = ch;
}

Basic File IO in C Produces all a's

I am using CodeBlocks on Windows to compile.
Why the program gives me this answer? Why there are so much as and don't get the answer 123456abcdef?
#include <stdio.h>
#include <stdlib.h>
int main(void) {
FILE *fp;
char s[100] = "abcdef";
char c1 = '0';
int i = 0;
fp = fopen("ot.txt", "w");
if (fp == NULL) {
printf("file open error");
exit(0);
}
while (s[i] != '\0') {
fputc(s[i], fp);
i++;
printf("%d", i);
}
while (c1 != EOF) {
c1 = fgetc(fp);
putchar(c1);
}
fclose(fp);
}
There are multiple problems in your code:
c1 should be defined with type int to accommodate for all values returned by fgetc(). a char cannot unambiguously store EOF.
You should open the file in write+update mode "w+"
You should rewind the stream pointer before reading back from it for 2 reasons: a seek operation is required between read and write operations and you want to read the characters from the start of the file.
You need to test for EOF after reading a byte with fgetc(), otherwise you will output the EOF converted to unsigned char to stdout before exiting the loop.
It is good style to return 0; from main() to indicate success and non-zero to indicate failure.
Here is a corrected version:
#include <stdio.h>
int main(void) {
FILE *fp;
char s[] = "abcdef";
int i, c;
fp = fopen("ot.txt", "w+");
if (fp == NULL) {
printf("file open error\n");
return 1;
}
i = 0;
while (s[i] != '\0') {
fputc(s[i], fp);
i++;
printf("%d", i);
}
rewind(fp);
while ((c1 = fgetc(fp)) != EOF) {
putchar(c1);
}
printf("\n");
fclose(fp);
return 0;
}

C file to check if file is empty or contains ASCII text

I'm trying to write a program which should be able to take a file as input in terminal and then determine if the file is empty or written in ASCII text. But I keep getting segmentation fault 11.
My code is as follows:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
unsigned char c;
int size;
FILE *file = fopen(argv[1], "r");
fseek(&c, 0, SEEK_END);
size = ftell(file);
if (size == 0)
{
printf("file is empty\n");
}
fclose(file);
FILE *file = fopen(argv[1], "r");
c = fgetc(file);
if (c != EOF && c <= 127)
{
printf("ASCII\n");
}
fclose(file);
}
Any ideas as to why?
1] fseek doesnt have first argument unsgined char*, but FILE*.
fseek(file, 0, SEEK_END);
2] You shouldn't use unsigned char / char for checking for EOF, use int for sure.
3] Working and simplier code
int main(int argc, char *argv[])
{
if (argc < 2)
{
// err we havent filename
return 1;
}
int c;
FILE *file = fopen(argv[1], "r");
if (file == NULL)
{
// err failed to open file
return 1;
}
c = fgetc(file);
if (c == EOF)
{
printf("empty\n");
fclose(file);
return 0;
}
else
{
ungetc(c, file);
}
while ((c = fgetc(file)) != EOF)
{
if (c < 0 || c > 127)
{
// not ascii
return 1;
}
}
printf("ascii\n");
fclose(file);
return 0;
}
fseek(&c, 0, SEEK_END);
Here you are supposed to pass file descriptor, like
fseek(file, 0, SEEK_END);
fseek takes a FILE* as a parameter and you are giving it a unsigned char* - change &c to file.

Compare each line from two different files and print the lines that are different in C

Supposing that I have two files like this:
file1.txt
john
is
the new
guy
file2.txt
man
the old
is
rick
cat
dog
I'd like to compare first line from file1 with all the lines from file2 and verify if it exist. If not, go two the second line from file1 and compare it with all the lines from file2.. and so on until eof is reached by file1.
The output that I expect is:
john
the new
guy
How I thought this should be done:
read file1 and file2
create a function which returns the line number of each of them
take the first line from file1 and compare it to all the lines from file2
do this until all the lines from file1 are wasted
Now, I don't know what I'm doing wrong, but I don't get the result that I expect:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int countlines(char *filename)
{
int ch = 0, lines = 0;
FILE *fp = fopen(filename, "r");
if (fp == NULL)
return 0;
do {
ch = fgetc(fp);
if (ch == '\n')
lines++;
} while (ch != EOF);
if (ch != '\n' && lines != 0)
lines++;
fclose(fp);
return lines;
}
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
int counter = 0;
for (int i = 0; i < countlines(argv[1]); i++)
{
fgets(buffer_line_template_file, 100, template_file);
for (int j = 0; j < countlines(argv[2]); j++)
{
fgets(buffer_line_data_file, 100, data_file);
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
counter++;
printf("%d", counter);
}
}
}
printf("\n\n");
return 0;
}
Could someone please point me into the right direction ? For testing purposes I created a counter at the end which was a part of a small debug. There should be the print() function
As per #chux answer I got the following simplified code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
while(fgets(buffer_line_template_file, 100, template_file))
{
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
printf("%s\n", buffer_line_template_file);
}
}
}
printf("\n\n");
return 0;
}
The above code is giving me the following output, which is not what is expected:
john
john
john
john
john
john
is
is
is
is
is
the new
the new
the new
the new
the new
the new
guy
guy
guy
guy
guy
guy
Problems with OP's code
Imprecise definition of line.
Excessive recalculation
Fuzzy determination of the number of lines in a file.
Unlike string, which has a precise definition in C, reading a line is not so well defined. The primary specificity issue: does a line contain the trailing '\n'. If the first answer is Yes, then does the last text in a file after a '\n' constitute a line? (Excessively long lines are another issue, but let us not deal with that today.)
Thus possibly some lines end with '\n' and others do not, fooling strcmp("dog", "dog\n").
The easiest solution is to read a line until either 1) a '\n' is encountered, 2) EOF occurs or 3) line buffer is full. Then after getting a line, lop off the potential trailing '\n'.
Now all lines code subsequently works with have no '\n'.
fgets(buffer_line_template_file, 100, template_file);
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
OP's loop is incredible wasteful. Consider a file with 1000 lines. Code will loop, calling 1000 times countlines() (each countlines() call reads 1000 lines) times when one countlines() call would suffice.
// for (int j = 0; j < countlines(argv[2]); j++)
int j_limit = countlines(argv[2]);
for (int j = 0; j < j_limit; j++)
There really is no need to count the line anyways, just continue until EOF (fgets() returns NULL). So no need to fix its fuzzy definition. (fuzzy-ness concerns same issues as #1)
int counter = 0;
for (fgets(buffer_line_template_file, 100, template_file)) {
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while ((fgets(buffer_line_data_file, 100, data_file)) {
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0) {
counter++;
printf("%d", counter);
}
}
}
Other simplifications possible - for another day.
FWIW, following counts lines of text allowing the last line in the file to optionally end with a '\n'.
unsigned long long FileLineCount(FILE *istream) {
unsigned long long LineCount = 0;
rewind(istream);
int previous = '\n';
int ch;
while ((ch = fgetc(inf)) != EOF) {
if (previous == '\n') LineCount++;
previous = ch;
}
return LineCount;
}
Note that this function may get a different result that fgets() calls. Consider a file of one line of 150 characters. fgets(..., 100,...) will report 2 lines. FileLineCount() reports 1.
[Edit] Updated code to conform to OP functionality.
int found = 0;
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) == 0)
{
found = 1;
break;
}
}
if (!found) printf("%s\n", buffer_line_template_file);
This program prints the diff of two files file1.txt and file2.txt.
#include<stdio.h>
#include <stdlib.h>
#include <memory.h>
int main() {
FILE *fp1, *fp2;
int ch1, ch2;
char fname1[40], fname2[40];
char *line = NULL;
size_t len = 0;
ssize_t read;
char *line2 = NULL;
size_t len2 = 0;
ssize_t read2;
fp1 = fopen("file1.txt", "r");
fp2 = fopen("file2.txt", "r");
if (fp1 == NULL) {
printf("Cannot open %s for reading ", fname1);
exit(1);
} else if (fp2 == NULL) {
printf("Cannot open %s for reading ", fname2);
exit(1);
} else {
while ((read = getline(&line, &len, fp1)) != -1 && (read2 = getline(&line2, &len2, fp2)) != -1) {
if (!strcmp(line, line2)) {
printf("Retrieved diff on line %zu :\n", read);
printf("%s", line);
}
}
if (ch1 == ch2)
printf("Files are identical \n");
else if (ch1 != ch2)
printf("Files are Not identical \n");
fclose(fp1);
fclose(fp2);
}
return (0);
}
You already have a very good answer (and always will from chux), but here is a slightly different approach to the problem. It uses automatic storage to reading file2 into an array of strings and then compares each line in file1 against every line in file2 to determine whether it is unique. You can easily convert the code to dynamically allocate memory, but for sake of complexity that was omitted:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 256, MAXL = 512 };
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1);
int main (int argc, char **argv) {
FILE *fp1 = fopen (argc > 1 ? argv[1] : "file1.txt", "r");
FILE *fp2 = fopen (argc > 2 ? argv[2] : "file2.txt", "r");
size_t n1 = 0, n2 = 0;
if (!fp1 || !fp2) {
fprintf (stderr, "error: file open failed.\n");
return 1;
}
printf ("\nunique words in file1, not in file 2.\n\n");
file1infile2 (fp2, fp1, &n2, &n1);
printf ("\nanalyzed %zu lines in file1 against %zu lines in file2.\n\n",
n1, n2);
return 0;
}
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1)
{
char buf[MAXC] = "";
char f2buf[MAXL][MAXC] = { "" };
size_t i;
*n1 = *n2 = 0;
while (*n2 < MAXL && fgets (buf, MAXC, fp2)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0;
strcpy (f2buf[(*n2)++], buf);
}
while (*n1 < MAXL && fgets (buf, MAXC, fp1)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0, (*n1)++;
for (i = 0; i < *n2; i++)
if (!(strcmp (f2buf[i], buf)))
goto matched;
printf (" %s\n", buf);
matched:;
}
}
Look over the code and let me know if you have any questions.
Example Use/Output
$ ./bin/f1inf2 dat/f1 dat/f2
unique words in file1, not in file 2.
john
the new
guy
analyzed 4 lines in file1 against 6 lines in file2.

Number of Lines Mystery?

My goal is to implement a function that calculates the number of lines in a file. And empty file is considered to have no lines. If the last line of the given file is not empty, it should be counted as a line despite not ending with a newline character.
I've come up with the following code:
int linecount(const char *filename)
{
FILE *f = fopen(filename, "r");
if(!f)
return -1;
int lines = 0;
int c = 0;
int n = 0;
while((c = fgetc(f)) != EOF){
if(c == '\n')
lines++;
n++;
}
if(n==0)
return 0; //return 0 if the file is empty
if(c!='\n' && !isspace(c))
lines++; //count the last line if it's not empty
fclose(f);
return lines;
}
However, even after playing with it for over an hour I can't figure out why its return value lines is one too large in some cases...
You were close, here how you could do it:
int linecount(const char *filename) {
FILE *f = fopen(filename, "r");
if (!f)
return -1;
int lines = 0;
int c = 0;
int n = 0;
int read_line = 0;
while ((c = fgetc(f)) != EOF) {
read_line = 1;
if (c == '\n') {
lines++;
read_line = 0;
}
n++;
}
if (n == 0)
return 0; //return 0 if the file is empty
if(read_line)
lines++;
fclose(f);
return lines;
}
The idea is that we want to know if we started reading a line AND if we met a newline, at end of this line. So, we use another variable, called read_line and we use it as a flag.
We set it to 1 (true) if we just started reading a line and we set it to 0 (false) if we just met a newline (end of the line).
Now, if we have something like:
1[newline]
2[newline]
3
we will be OK, since we need to check if read_line after we read the file. Is so, we have to increment our line counter by one.
This is also OK:
1[newline]
2[newline]
3[newline]
since we saw three newlines and the read_line is 0 after we read the file.
Same goes for this case:
1[newline]
2[newline]
3[newline]
[nothing here]
since our flag is going to be equal to 0 after reading the file, since the 3rd newline should set it to 0 and we never actually enter the 4th line in our loop, since there is nothing to read.
With your previous implementation, as stated in the comments, this line:
if(c!='\n' && !isspace(c))
would be executed with c being equal to EOF.
Or you could just use fgets() and you are done. Check the example:
#include <stdio.h>
#include <string.h>
#define bufSize 1024
int main(int argc, char *argv[])
{
FILE *fp;
char buf[bufSize];
if ((fp = fopen("test.txt", "rb")) == NULL)
{ /* Open source file. */
perror("fopen source-file");
return 1;
}
int lines = 0;
while (fgets(buf, sizeof(buf), fp) != NULL)
{ /* While we don't reach the end of source. */
/* Read characters from source file to fill buffer. */
/* fgets will stop when it finds a newline. */
lines++;
}
printf("lines = %d\n", lines);
fclose(fp);
return 0;
}
Modify sample
int linecount(const char *filename)
{
FILE *f = fopen(filename, "r");
if(!f)
return -1;
int lines = 0;
int c = 0;
int flag = 1;
while((c = fgetc(f)) != EOF){
if(flag = (c == '\n'))
lines++;
}
if(!flag)
lines++; //count the last line if it's not empty
fclose(f);
return lines;
}
A simple solution can be
int linecount(const char *filename)
{
FILE *stream;
char *line = NULL;
size_t len = 0;
ssize_t read;
int numOfLines = 0;
stream = fopen(filename, "r");
if (stream == NULL)
exit(EXIT_FAILURE);
while ((read = getline(&line, &len, stream)) != -1) {
numOfLines++;
}
free(line);
fclose(stream);
return numOfLines;
}

Resources