Possible errors when reading larger lines - c

I noticed a problem with the next line while ((read = getline(&line, &len, fp)) != -1) {: I may get errors if the lines read are too large. Should I read MAXLINE pieces? I think it's a problem if I break the original line into pieces. It is possible to "cut" the fixed line in the middle of the word I am looking for.
For example, if you break the car for sale (you are looking for ) in car fo and r sale. I will not find for in either of the two pieces. Maybe a solution would be for each part read, to read extra len (search_word) characters (without changing the original place where the next piece would have started). Basically I read more r s in the first part, but it guarantees that I will find for
How can I handle possible errors?
The following program is based on the implementation of grep from linux, doing a search for several words
program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char** argv)
{
if (argc < 3)
{
printf("Usages:\n1: ./grep.o <word> <file>\n2: ./grep.o -e <word> [-e "
"<word>...] <file>\n");
return 1;
}
else
{
FILE *fp;
char *line = NULL;
size_t len = 0;
ssize_t read;
int i;
fp = fopen(argv[argc - 1], "r");
if (fp == NULL)
{
printf("File doesn't exist or cannot be read!\n");
return 1;
}
if (argc == 3)
{
while ((read = getline(&line, &len, fp)) != -1)
{
if (strstr(line, argv[1]) != NULL)
printf("%s", line);
}
}
else
{
for (i = 1; i < argc - 2; i += 2)
{
if (strcmp(argv[i], "-e") != 0)
{
printf("The option must be \"-e\"!\n");
fclose(fp);
return 1;
}
}
if (i + 1 == argc - 1)
{
if (strcmp(argv[i], "-e") != 0)
{
printf("The option must be \"-e\"!\n");
}
else
{
printf("The option must have a word after it!\n");
}
fclose(fp);
return 1;
}
while ((read = getline(&line, &len, fp)) != -1)
{
for (i = 1; i < argc - 2; i += 2)
{
if (strstr(line, argv[i + 1]) != NULL)
{
printf("%s", line);
break;
}
}
}
fclose(fp);
if (line)
free(line);
}
return 0;
}
}

The problem you describe is not present in your code precisely because you use getline() instead of fgets(). getline() reallocates the line pointer and len as required to read a full line. The only limitation is memory, which is unlikely to pose a problem on current systems.
Note however that you should close the file and free line outside of the else branch to avoid a memory leak.

Related

grep implementation for multi-word search

I wrote a program that implements the grep command. I tried to search by several words. I use the -e WORD option to search by word in the target file, so if the option is present several times in the command line you can search for more words grep will display any line that contains at least one searched word.
I would like you to criticize the implementation of this option (-e WORD). What possible problems may arise?
Something tells me that I could have implemented it even better
I think the treatment of this grep option could have been better written
code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char** argv){
if(argc < 3) {
printf("Usages:\n1: ./grep.o <word> <file>\n2: ./grep.o -e <word> [-e <word>...] <file>\n");
return 1;
}
else {
FILE* fp;
char* line = NULL;
size_t len = 0;
ssize_t read;
int i;
fp = fopen(argv[argc - 1], "r");
if(fp == NULL) {
printf("File doesn't exist or cannot be read!\n");
return 1;
}
if(argc == 3) {
while((read = getline(&line, &len, fp)) != -1) {
if(strstr(line, argv[1]) != NULL)
printf("%s", line);
}
}
else {
for(i = 1; i < argc - 2; i += 2) {
if(strcmp(argv[i],"-e") != 0) {
printf("The option must be \"-e\"!\n");
fclose(fp);
return 1;
}
}
if(i + 1 == argc - 1) {
if(strcmp(argv[i],"-e") != 0) {
printf("The option must be \"-e\"!\n");
}
else {
printf("The option must have a word after it!\n");
}
fclose(fp);
return 1;
}
while((read = getline(&line, &len, fp)) != -1) {
for(i = 1; i < argc - 2; i += 2) {
if(strstr(line, argv[i + 1]) != NULL) {
printf("%s", line);
break;
}
}
}
fclose(fp);
if(line)
free(line);
}
return 0;
}
}

Replace string in binary file

I am trying to write a *nix program that copies itself and replaces a string inside the binary. The copy process doesn't seem to work though.
Here's the code:
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <stdio.h>
#include <fcntl.h>
#define BUFSIZE 10
#define FILENAME "token"
void findstring(const char *exe, const char* str)
{
char buf[BUFSIZE];
int line_num = 1;
int i = 0, find_result = 0;
FILE *fp = fopen(exe, "rb");
if(fp == NULL)
exit(-1);
FILE *out = fopen("out", "wb");
if(out == NULL)
exit(-1);
while(fgets(buf, BUFSIZE, fp) != NULL) {
if((strstr(buf, str)))
{
printf("A match found on line: %d\n", line_num);
printf("\n%s\n", buf);
find_result++;
// reverse "token" string in the output
for(i = 0; i< BUFSIZE; i++)
{
if(strstr(&buf[i], "t") != NULL)
buf[i] = 'n';
else if(strstr(&buf[i], "o") != NULL)
buf[i] = 'e';
else if(strstr(&buf[i], "k") != NULL)
buf[i] = 'k';
else if(strstr(&buf[i], "e") != NULL)
buf[i] = 'o';
else if(strstr(&buf[i], "n") != NULL)
buf[i] = 't';
}
}
line_num++;
fputs(buf, out);
}
if(find_result == 0) {
printf("\nSorry, couldn't find a match.\n");
}
fclose(fp);
fclose(out);
}
int main(int argc, char **argv, char **envp)
{
// argv[1] = FILENAME;
char buf[1024];
int fd, rc;
findstring(argv[0], "token");
if(argc == 1) {
printf("\n\n%s [file to read]\n\n", argv[0]);
exit(1);
}
printf("FILENAME macro = %s", FILENAME);
if(strstr(argv[1], "token") != NULL) {
printf("\n\nYou may not access '%s'\n\n", argv[1]);
exit(2);
}
fd = open(argv[1], O_RDONLY);
if(fd == -1) {
printf("\n\nUnable to open %s\n\n", argv[1]);
exit(3);
}
rc = read(fd, buf, sizeof(buf));
if(rc == -1) {
printf("\n\nUnable to read fd %d\n\n", fd);
exit(4);
}
write(1, buf, rc);
return 0;
}
"Token" string should be reversed in the output binary ("nekot"), with the findstring function responsible of performing this task.
It is also worth noting that the number of matches found strictly depends on the BUFSIZE constant.
What is this code missing?
Thanks
consider what this does:
if(strstr(&buf[i], "t") != NULL)
buf[i] = 'n';
This will search the buffer starting at index i, and if the string "t" appears anywhere in the buffer, it will replace the first character with n. So if your buffer has
a string with token inside.
the first iteration of the for loop will change it to
n string with token inside.
as the loop proceeds you'll get
nnnnnnnnnnith token inside.
after 10 iterations and ultimately
nnnnnnnnnnnnnnnekooooooooo.
Other issues:
fgets reads a string up to a newline or up to BUFSIZE-1 characters. There may well be bytes that are equivalent to newline chars.
You're scanning through BUFSIZE bytes regardless of how many bytes you read.
fputs will write up to the first NUL byte. If there are NUL bytes anywhere in your input binary, stuff after the NUL in the buffer will be lost.
The above means you probably want to use fread/fwrite instead of fgets/fputs, and you want to carefully check return values for shot read or writes.
1.
All C style string functions break at first '\0' . So if buf contains null character before Your goal, will be never found.
if((strstr(buf, str))) { ... }
I suggest loop with step one character (byte) coded by hand, or functions from family memXXXXcmp etc
If Your token is over boundaries of two buffers (from two loo[ iterations), no comparison can fount is

Compare each line from two different files and print the lines that are different in C

Supposing that I have two files like this:
file1.txt
john
is
the new
guy
file2.txt
man
the old
is
rick
cat
dog
I'd like to compare first line from file1 with all the lines from file2 and verify if it exist. If not, go two the second line from file1 and compare it with all the lines from file2.. and so on until eof is reached by file1.
The output that I expect is:
john
the new
guy
How I thought this should be done:
read file1 and file2
create a function which returns the line number of each of them
take the first line from file1 and compare it to all the lines from file2
do this until all the lines from file1 are wasted
Now, I don't know what I'm doing wrong, but I don't get the result that I expect:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int countlines(char *filename)
{
int ch = 0, lines = 0;
FILE *fp = fopen(filename, "r");
if (fp == NULL)
return 0;
do {
ch = fgetc(fp);
if (ch == '\n')
lines++;
} while (ch != EOF);
if (ch != '\n' && lines != 0)
lines++;
fclose(fp);
return lines;
}
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
int counter = 0;
for (int i = 0; i < countlines(argv[1]); i++)
{
fgets(buffer_line_template_file, 100, template_file);
for (int j = 0; j < countlines(argv[2]); j++)
{
fgets(buffer_line_data_file, 100, data_file);
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
counter++;
printf("%d", counter);
}
}
}
printf("\n\n");
return 0;
}
Could someone please point me into the right direction ? For testing purposes I created a counter at the end which was a part of a small debug. There should be the print() function
As per #chux answer I got the following simplified code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
while(fgets(buffer_line_template_file, 100, template_file))
{
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
printf("%s\n", buffer_line_template_file);
}
}
}
printf("\n\n");
return 0;
}
The above code is giving me the following output, which is not what is expected:
john
john
john
john
john
john
is
is
is
is
is
the new
the new
the new
the new
the new
the new
guy
guy
guy
guy
guy
guy
Problems with OP's code
Imprecise definition of line.
Excessive recalculation
Fuzzy determination of the number of lines in a file.
Unlike string, which has a precise definition in C, reading a line is not so well defined. The primary specificity issue: does a line contain the trailing '\n'. If the first answer is Yes, then does the last text in a file after a '\n' constitute a line? (Excessively long lines are another issue, but let us not deal with that today.)
Thus possibly some lines end with '\n' and others do not, fooling strcmp("dog", "dog\n").
The easiest solution is to read a line until either 1) a '\n' is encountered, 2) EOF occurs or 3) line buffer is full. Then after getting a line, lop off the potential trailing '\n'.
Now all lines code subsequently works with have no '\n'.
fgets(buffer_line_template_file, 100, template_file);
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
OP's loop is incredible wasteful. Consider a file with 1000 lines. Code will loop, calling 1000 times countlines() (each countlines() call reads 1000 lines) times when one countlines() call would suffice.
// for (int j = 0; j < countlines(argv[2]); j++)
int j_limit = countlines(argv[2]);
for (int j = 0; j < j_limit; j++)
There really is no need to count the line anyways, just continue until EOF (fgets() returns NULL). So no need to fix its fuzzy definition. (fuzzy-ness concerns same issues as #1)
int counter = 0;
for (fgets(buffer_line_template_file, 100, template_file)) {
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while ((fgets(buffer_line_data_file, 100, data_file)) {
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0) {
counter++;
printf("%d", counter);
}
}
}
Other simplifications possible - for another day.
FWIW, following counts lines of text allowing the last line in the file to optionally end with a '\n'.
unsigned long long FileLineCount(FILE *istream) {
unsigned long long LineCount = 0;
rewind(istream);
int previous = '\n';
int ch;
while ((ch = fgetc(inf)) != EOF) {
if (previous == '\n') LineCount++;
previous = ch;
}
return LineCount;
}
Note that this function may get a different result that fgets() calls. Consider a file of one line of 150 characters. fgets(..., 100,...) will report 2 lines. FileLineCount() reports 1.
[Edit] Updated code to conform to OP functionality.
int found = 0;
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) == 0)
{
found = 1;
break;
}
}
if (!found) printf("%s\n", buffer_line_template_file);
This program prints the diff of two files file1.txt and file2.txt.
#include<stdio.h>
#include <stdlib.h>
#include <memory.h>
int main() {
FILE *fp1, *fp2;
int ch1, ch2;
char fname1[40], fname2[40];
char *line = NULL;
size_t len = 0;
ssize_t read;
char *line2 = NULL;
size_t len2 = 0;
ssize_t read2;
fp1 = fopen("file1.txt", "r");
fp2 = fopen("file2.txt", "r");
if (fp1 == NULL) {
printf("Cannot open %s for reading ", fname1);
exit(1);
} else if (fp2 == NULL) {
printf("Cannot open %s for reading ", fname2);
exit(1);
} else {
while ((read = getline(&line, &len, fp1)) != -1 && (read2 = getline(&line2, &len2, fp2)) != -1) {
if (!strcmp(line, line2)) {
printf("Retrieved diff on line %zu :\n", read);
printf("%s", line);
}
}
if (ch1 == ch2)
printf("Files are identical \n");
else if (ch1 != ch2)
printf("Files are Not identical \n");
fclose(fp1);
fclose(fp2);
}
return (0);
}
You already have a very good answer (and always will from chux), but here is a slightly different approach to the problem. It uses automatic storage to reading file2 into an array of strings and then compares each line in file1 against every line in file2 to determine whether it is unique. You can easily convert the code to dynamically allocate memory, but for sake of complexity that was omitted:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 256, MAXL = 512 };
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1);
int main (int argc, char **argv) {
FILE *fp1 = fopen (argc > 1 ? argv[1] : "file1.txt", "r");
FILE *fp2 = fopen (argc > 2 ? argv[2] : "file2.txt", "r");
size_t n1 = 0, n2 = 0;
if (!fp1 || !fp2) {
fprintf (stderr, "error: file open failed.\n");
return 1;
}
printf ("\nunique words in file1, not in file 2.\n\n");
file1infile2 (fp2, fp1, &n2, &n1);
printf ("\nanalyzed %zu lines in file1 against %zu lines in file2.\n\n",
n1, n2);
return 0;
}
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1)
{
char buf[MAXC] = "";
char f2buf[MAXL][MAXC] = { "" };
size_t i;
*n1 = *n2 = 0;
while (*n2 < MAXL && fgets (buf, MAXC, fp2)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0;
strcpy (f2buf[(*n2)++], buf);
}
while (*n1 < MAXL && fgets (buf, MAXC, fp1)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0, (*n1)++;
for (i = 0; i < *n2; i++)
if (!(strcmp (f2buf[i], buf)))
goto matched;
printf (" %s\n", buf);
matched:;
}
}
Look over the code and let me know if you have any questions.
Example Use/Output
$ ./bin/f1inf2 dat/f1 dat/f2
unique words in file1, not in file 2.
john
the new
guy
analyzed 4 lines in file1 against 6 lines in file2.

"aborted (core dumped)" in all programs

yesterday, during programming time everything was okay, but today I get weird error. I do not know why but after running my programs, in terminal i get this error "aborted (core dumped)", also I run programs which are already done and the problem is the same.
Example of the program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define CHUNK 12
char *getWord(FILE *infile);
int main(int argc, char *argv[])
{
char *word;
FILE *infile, *outfile;
int n = 0;
if(argc != 2)
{
printf("Error! Type:./file_name input_file output_file\n");
abort();
}
infile = fopen(argv[1], "r");
if(infile != NULL)
{
outfile = fopen(argv[2], "w");
if(outfile == NULL)
{
printf("Error! Cannot open the output_file\n");
abort();
}
else
{
while(!feof(infile))
{
word = getWord(infile);
if(word == NULL)
{
free(word);
abort();
}
n++;
if(n % 2 == 0)
{
fputs(word, outfile);
fputs(" ", outfile);
}
else
{
fputs(word, outfile);
fputs("(", outfile);
fputs(word, outfile);
fputs(")", outfile);
fputs(" ", outfile);
}
free(word);
}
}
}
else
{
printf("Error! Cannot open the input_file\n");
abort();
}
fclose(infile);
fclose(outfile);
return 0;
}
char *getWord(FILE *infile)
{
char *word, *word2;
int length, cursor, c;
word = malloc(sizeof(char)*CHUNK);
if(word == NULL)
{
return NULL;
}
length = CHUNK;
cursor = 0;
while(isalpha(c = getc(infile)) && !feof(infile))
{
word[cursor] = c;
cursor++;
if(cursor >= length)
{
length += CHUNK;
word2 = realloc(word, length*sizeof(char));
if(word2 == NULL)
{
free(word2);
return NULL;
}
else word2 = word;
}
}
ungetc(c, infile);
word[cursor] = '\0';
return word;
}
and the error:
Error! Type:./file_name input_file output_file
Aborted (core dumped)
The logic in your realloc is wrong.
word2 = realloc(word, length*sizeof(char));
if(word2 == NULL)
{
free(word2);
return NULL;
}
else word2 = word;
should be
word2 = realloc(word, cursor);
if(word2 == NULL)
{
free(word);
return NULL;
}
word = word2;
There are a few changes here
word starts out having length bytes allocated so there is no point in reallocating it to the same size. The variable which tracks string size is cursor so you need to reallocate to match its size.
(minor) There is no need to use sizeof(char) to help calculate the size of an allocation - this is guaranteed to be 1
If realloc fails, you need to free the original pointer, not the new one (which you know is NULL).
If the reallocation succeeds, your heap cell may have been moved, leaving word pointing to memory you don't own. The rest of the function operates on word so you need to update it to point to your new buffer (word2)
As for why this worked for you previously, the above code results in undefined behaviour in a number of places. Sometimes you're unlucky and this appears to work correctly.
If your command requires 2 parameters, you need to check for argc != 3 since the command name itself is considered an argument. If you are giving it 2 parameters, then your check on argc != 2 is failing and you're getting your error message, and the core dump due to the abort call.
Rather than abort, you should call exit with a non-zero parameter. E.g.,
if(argc != 3)
{
printf("Error! Type: %s input_file output_file\n", argv[0]);
exit(1);
}

C fopen() and fgets() issue

I am trying to read two files in my program I wrote earlier, but it always fails.
char line[BUFSIZ];
FILE *fp2=freopen("source.dat","r");
if(fp2==NULL)
printf("Problm opening: source.dat");
FILE *fp3=freopen("result.dat", "r");
if(fp3==NULL)
printf("Problm opening: result.dat");
char line2[BUFSIZ];
int len;
while( (fgets(line2, BUFSIZ, fp2) != NULL) && (fgets(line, BUFSIZ, fp3) != NULL)) {
len=strlen(line);
if( line[len - 1] == '\n' ) line[len-1] = '\0'; len=strlen(line2);
if( line2[len - 1] == '\n' ) line2[len-1] = '\0';
rename(line, line2);
}
I'm not sure why, I know my program writes the two files I want to open. It just doesn't get past the while loop.
freopen takes 3 arguments viz., filename, mode and FILE Stream object. Hence, to reopen a file, it should already be open. If we invoke a freopen as the first call, the runtime may throw an exception of uninitialized access.
Modifying the code as below
fp2 = fopen("source.dat", "r");
fp3 = fopen("result.dat", "r");
I am able to run your code without any problem and control continues beyond the while loop. The files stored in first file are renamed to the names stored in second file, which I presume is the objective of your program.
This code works for me and apparently should do the same yours does, except where noted.
The first note apparently was the right one :-)
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(void)
{
char line[2][BUFSIZ];
FILE *fp[2];
char *file[2] = { "source.dat", "result.dat" };
int f, finished = 0;
// Any pointers to source and result must be flushed and closed at this point,
// i.e. if this same program has created the files, it must now close them.
for (f = 0; f < 2; f++)
{
if (NULL == (fp[f] = fopen(file[f],"r")))
{
fprintf(stderr, "Error opening %s\n", file[f]);
exit(-1);
}
}
while(!finished)
{
int len;
for (f = 0; f < 2 && (!finished); f++)
{
if (NULL == fgets(line[f], BUFSIZ, fp[f]))
{
fprintf(stderr, "NULL on %s\n", file[f]);
finished = 1;
break;
}
if (feof(fp[f]))
{
fprintf(stderr, "end of %s\n", file[f]);
finished = 1;
break;
}
len = strlen(line[f]);
// if one of the file contains an empty line, program might crash
if (0 == len)
{
fprintf(stderr, "empty line in %s\n", file[f]);
finished = 1;
break;
}
if ('\n' == line[f][len-1])
line[f][len-1] = 0x0;
}
if (finished)
break;
fprintf(stderr, "Rename(%s, %s)\n", line[0], line[1]);
// rename(line, line2);
}
for (f = 0; f < 2; f++)
fclose(fp[f]);
return 0;
}

Resources