Replace string in binary file - c

I am trying to write a *nix program that copies itself and replaces a string inside the binary. The copy process doesn't seem to work though.
Here's the code:
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <stdio.h>
#include <fcntl.h>
#define BUFSIZE 10
#define FILENAME "token"
void findstring(const char *exe, const char* str)
{
char buf[BUFSIZE];
int line_num = 1;
int i = 0, find_result = 0;
FILE *fp = fopen(exe, "rb");
if(fp == NULL)
exit(-1);
FILE *out = fopen("out", "wb");
if(out == NULL)
exit(-1);
while(fgets(buf, BUFSIZE, fp) != NULL) {
if((strstr(buf, str)))
{
printf("A match found on line: %d\n", line_num);
printf("\n%s\n", buf);
find_result++;
// reverse "token" string in the output
for(i = 0; i< BUFSIZE; i++)
{
if(strstr(&buf[i], "t") != NULL)
buf[i] = 'n';
else if(strstr(&buf[i], "o") != NULL)
buf[i] = 'e';
else if(strstr(&buf[i], "k") != NULL)
buf[i] = 'k';
else if(strstr(&buf[i], "e") != NULL)
buf[i] = 'o';
else if(strstr(&buf[i], "n") != NULL)
buf[i] = 't';
}
}
line_num++;
fputs(buf, out);
}
if(find_result == 0) {
printf("\nSorry, couldn't find a match.\n");
}
fclose(fp);
fclose(out);
}
int main(int argc, char **argv, char **envp)
{
// argv[1] = FILENAME;
char buf[1024];
int fd, rc;
findstring(argv[0], "token");
if(argc == 1) {
printf("\n\n%s [file to read]\n\n", argv[0]);
exit(1);
}
printf("FILENAME macro = %s", FILENAME);
if(strstr(argv[1], "token") != NULL) {
printf("\n\nYou may not access '%s'\n\n", argv[1]);
exit(2);
}
fd = open(argv[1], O_RDONLY);
if(fd == -1) {
printf("\n\nUnable to open %s\n\n", argv[1]);
exit(3);
}
rc = read(fd, buf, sizeof(buf));
if(rc == -1) {
printf("\n\nUnable to read fd %d\n\n", fd);
exit(4);
}
write(1, buf, rc);
return 0;
}
"Token" string should be reversed in the output binary ("nekot"), with the findstring function responsible of performing this task.
It is also worth noting that the number of matches found strictly depends on the BUFSIZE constant.
What is this code missing?
Thanks

consider what this does:
if(strstr(&buf[i], "t") != NULL)
buf[i] = 'n';
This will search the buffer starting at index i, and if the string "t" appears anywhere in the buffer, it will replace the first character with n. So if your buffer has
a string with token inside.
the first iteration of the for loop will change it to
n string with token inside.
as the loop proceeds you'll get
nnnnnnnnnnith token inside.
after 10 iterations and ultimately
nnnnnnnnnnnnnnnekooooooooo.
Other issues:
fgets reads a string up to a newline or up to BUFSIZE-1 characters. There may well be bytes that are equivalent to newline chars.
You're scanning through BUFSIZE bytes regardless of how many bytes you read.
fputs will write up to the first NUL byte. If there are NUL bytes anywhere in your input binary, stuff after the NUL in the buffer will be lost.
The above means you probably want to use fread/fwrite instead of fgets/fputs, and you want to carefully check return values for shot read or writes.

1.
All C style string functions break at first '\0' . So if buf contains null character before Your goal, will be never found.
if((strstr(buf, str))) { ... }
I suggest loop with step one character (byte) coded by hand, or functions from family memXXXXcmp etc
If Your token is over boundaries of two buffers (from two loo[ iterations), no comparison can fount is

Related

Problem reading numbers from file using stdin in C

I have to read in a stream of numbers from a text file into my code using stdin by passing my program the file like ./msort <segment count> <file.txt.
I tried 2 methods of reading in the numbers, however these methods only work when I read them line by line as strings. I tried following a few links to type and typecast the string to an int, however I had no luck. The integer outputs are all just garbled. This is the code:
#define BUFFERSIZE 10
int main(int argc, char **argv) {
if (argc != 2) {
printf("Usage: %s <segment count>\n", argv[0]);
return 1;
}
char *line = NULL;
size_t size;
while (getline(&line, &size, stdin) != -1) {
line[strcspn(line, "\n")] = 0;
printf("%d\n", (int)*line);
}
// char *text = calloc(1, 1), buffer[BUFFERSIZE];
// while (fgets(buffer, BUFFERSIZE, stdin))
// {
// text = realloc(text, strlen(text) + 1 + strlen(buffer));
// strcat(text, buffer);
// buffer[strcspn(buffer, "\n")] = 0;
// printf("%d\n", (int)*buffer);
// }
return 0;
}
Could you please help me out with this?
screenshot of code and text file
Use strtol to read numbers from strings:
#include <errno.h> // errno
#include <stdlib.h> // strtol
// ...
char *line = NULL;
size_t size = 0;
errno = 0; // Reset errno before calling strtol.
char *endptr; // Used to check where strtol ended reading a number.
while (getline(&line, &size, stdin) != -1) {
long val = strtol(line, &endptr, 10); // Read a number from the line.
// Double-check that a number was indeed read. If you don't want to errorcheck,
// you can remove this, take out "#include <errno.h>", "errno = 0" and replace
// &endptr with NULL.
if (errno != 0) {
fprintf(stderr, "strtol: %s\n", strerror(errno));
return 1;
} else if (endptr == line) {
fprintf(stderr, "strtol: no digits were found\n");
return 1;
}
printf("%d\n", val);
}
I think there are some problems with your code.
You want to read lines from file specified as command-line argument(argv[1]) but you're passing stdin as parameter. you should open the file using fopen and use that file pointer in getline method like shown below.
You can then use sscanf to read integer values from the line string.
FILE *pFile = fopen(argv[1], "r");
if (!pFile) {
printf("Error opening file %s\n", argv[1]);
return EXIT_FAILURE;
}
int currentNum;
while (getline(&line, &size, pFile) != -1) {
line[strcspn(line, "\n")] = 0;
sscanf(line, "%d", &currentNum);
printf("%d\n", currentNum);
}

Strcmp not comparing strings from argv

** Updated 26/10 -> First of all thank you all for you help, I am getting closer now, I need more work and studying but I really appreciate you are helping me a lot :-)
Still don't know why the first "rain" word in the input.txt file is not getting the positive output from strcmp, and from the cmd I can see the "<" bracket don't appear except for the LAST line which is the line that works.
Also checked the highlighted response from Removing trailing newline character from fgets() input
and even if I change the code to the following:
while( fgets (line, sizeof line, fp)!=NULL ) {
/* remove \n from at the end of the str buffer*/
char * pos;
/*
if ((pos = strchr(line, '\n')) != NULL)
*pos = '\0';
*/
line[strcspn(line, "\n")] = 0;
I get the same result as if I use the if block instead. Maybe I'm getting extra \0 which might be the case. Anyone has a link where I can read about the delimiters I just used, or a nice reference of a debugger, etc. ... which I will have a look as soon as I come here? Thank you so much in advance!
read5.c version: Now from that input.txt file, it had an extra space on the last "rain" word, I removed the space, and it was able to find and get that last word compare as a true result, running in the strcmp if block. but that was the only string that was a true positive result from that if block.
on the cmd I can see:
$./read5 input.txt rain output.txt sun
>Maria
>rain
>manel
>Bla bla
<rain>
Found it! rain
On the output.txt it becomes:
Maria
rain
manel
Bla bla
sun
read5.c
#include <stdio.h>
#include <string.h>
/**
* Compile program:
* gcc read3.c -o read3
*
*/
int main (int argc, char **argv) {
FILE *fp, *fo;
char *compare, *replace;
char line[246];
if (argc <= 4){
printf(">Missing arguments on the command line.\n");
printf(">Be sure you run the program as\n\"./read3 input.txt compare outout.txt replace\"\n\n");
}
/* opening file for reading */
fp = fopen(argv[1] , "r");
if(fp == NULL){
perror("Error opening input file");
return 1;
}
compare = argv[2];
fo = fopen(argv[3], "w");
if(fo == NULL){
perror("Error opening output file");
return 1; //TODO check if: return 1 because it was expected, right?
}
replace = argv[4];
/*
printf(); made to test version 2
//printf("We are going to compare %s\n", compare);
//printf("We are going to replace it with %s\n", replace);
*/
while( fgets (line, sizeof line, fp)!=NULL ) {
/* remove \n from at the end of the str buffer*/
char * pos;
if ((pos = strchr(line, '\n')) != NULL)
*pos = '\0';
/* print str enclosed in <> so we can see what str actually contains */
//printf("Inside the loop, got the string: %s\n", line);
//printing the strings with defined delimiters
printf("<%s>\n", line);
if(strcmp(compare, line) == 0){
printf("Found it! %s \n", line);
fprintf(fo, "%s\n", replace);
}
else{
fprintf(fo, "%s\n", line);
}
}
fclose(fp);
fclose(fo);
return 0;
}
First question with no edits: 25/10
I need to make a program that is run like this:
./read2 input.txt rain output.txt sun
It reads the input.txt, searches for rain string and if finds it, replaces it with sun string and outputs all the text from input.txt with the replacements to the output.txt.
But with the code that I have so far, the strcmp is not comparing the strings I want, maybe it has the extra space that I get on the command line, I don't know... for now what is doing is copying everything from input.txt to output.txt... It's running the else block always...
Read2.c:
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv) {
FILE *fp, *fo;
char str[60];
//char* token;
/* opening file for reading */
fp = fopen(argv[1], "r");
char *compare = argv[2];
fo = fopen(argv[3], "w+");
char *replace = argv[4];
if (fp == NULL) {
perror("Error opening file");
return(-1);
}
//printf("We are going to compare %s\n", compare);
//printf("We are going to replace it with %s\n", replace);
while (fgets(str, 60, fp) != NULL) {
/* writing content to stdout */
//Take the \n out
//token = strtok(str, "\n");
printf("Inside the loop, got the string: %s\n", str);
if (strcmp(compare, str) == 0) {
//puts(str);
printf("Found it! %s \n", str);
fprintf(fo, "%s", replace);
} else {
fprintf(fo, "%s", str);
}
}
fclose(fp);
return(0);
}
input.txt:
Maria
rain
manel
Bla bla
rain
Ouput.txt becomes exactly as input.txt and before it was empty, so the code is working, except the if block that tests with strcmp.
The problem is the \n at the end of the str buffer. fgets adds the \n at end end of the line it reads, you need to get rid of it before comparing.
This is what you need:
while (fgets(str, 60, fp) != NULL) {
/* remove \n from at the end of the str buffer*/
char *pos;
if ((pos = strchr(str, '\n')) != NULL)
*pos = '\0';
/* print str enclosed in <> so we can see what str actually contains */
printf("Inside the loop, got the string: <%s>\n", str);
if (strcmp(compare, str) == 0) {
printf("Found it! %s\n", str);
fprintf(fo, "%s\n", replace);
}
else {
fprintf(fo, "%s\n", str);
}
}
Look at the comments in the code for explanations.
read.c
#include <stdio.h>
#include <string.h>
/**
* How to compile program:
* gcc read.c -o read
*
* How to run the program:
* .> ./read input.txt rainy output.txt sunny
* (On Windows MinGW compiler, simply:
* .> read input.txt rainy output.txt sunny - without ./)
*
*/
int main (int argc, char **argv) {
FILE *fp, *fo;
char *compare, *replace;
char line[246];
if (argc <= 4){
printf(">Missing arguments on the command line.\n");
printf(">Be sure you run the program as\n\"./read input.txt compare outout.txt replace\"\n\n");
}
/* Opening files for reading */
fp = fopen(argv[1] , "r");
if(fp == NULL){
perror("Error opening input file");
return 1;
}
compare = argv[2];
fo = fopen(argv[3], "w");
if(fo == NULL){
perror("Error opening output file");
return 1;
}
replace = argv[4];
while( fgets (line, (sizeof line), fp)!=NULL ) {
line[strcspn(line, "\n")] = 0;
if(strcmp(compare, line) == 0){
printf("Found it! %s \n", line);
fprintf(fo, "%s\n", replace);
}
else{
fprintf(fo, "%s\n", line);
}
}
fclose(fp);
fclose(fo);
return 0;
}
/*
Important info
strcspn ::
Locate first occurrence of character in string,
after locating the first occurrence of \n, replaces it by 0.
Sources::
https://stackoverflow.com/questions/2693776/removing-trailing-newline-character-from-fgets-input/28462221#28462221
Used to debug:
.>printf("1st: Reads input.txt, removes '\\n' from fgets, and prints it \n");
.>printf("2nd: Compares each line with 'rainy' \n");
.>printf("<%s>\n", line);
*/
input.txt
cloudy
rainy
chilly
rainy
rainy
Your approach fails because the lines read from the input file contain a trailing newline '\n' that makes the comparison return non zero.
You can strip the newline before comparing with the search string.
Note that there are other problems:
you should verify that enough command line arguments have been passed by testing argc > 4.
there is no need to open the output file in update mode "w+", "w" is simpler and better.
60 bytes is a bit small for the line array, limiting the longest line handled correctly to 58 bytes.
Here is an improved version:
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv) {
FILE *fp, *fo;
char *compare, *replace;
char line[256];
if (argc <= 4) {
printf("missing command line arguments\n");
return 1;
}
fp = fopen(argv[1], "r");
if (fp == NULL) {
perror("Error opening input file");
return 1;
}
compare = argv[2];
fo = fopen(argv[3], "w");
if (fo == NULL) {
perror("Error opening output file");
return 1;
}
replace = argv[4];
while (fgets(line, sizeof line, fp) != NULL) {
line[strcspn(line, "\n")] = '\0';
if (strcmp(line, compare) == 0) {
printf("fount it!);
fprintf(fo, "%s\n", replace);
} else {
fprintf(fo, "%s\n", line);
}
}
fclose(fp);
fclose(fo);
return 0;
}
Note that long lines will be broken into chunks that fit in the line array, so there may be false positives with the above naive approach.
You can remove this limitation completely with this inner loop:
int c;
int pos = 0;
int cmplen = strlen(compare);
for (;;) {
c = getc(fp);
if (c == '\n' || c == EOF) {
if (pos == cmplen) {
fprintf(fo, "%s", replace);
} else
if (pos > 0) {
fprintf(fo, "%*s", pos, compare);
}
pos = 0;
if (c == EOF)
break;
} else {
if (pos >= 0) {
if (compare[pos] == (char)c) {
pos++;
continue;
}
if (pos > 0) {
fprintf(fo, "%*s", pos, compare);
}
pos = -1;
}
}
putc(c, fo);
}

system calls read and write in C

I am wondering how to use the system calls read() and write() in C.
I am trying to read in the contents of a pre existing, file within a directory, into a buffer (array) so I can step through the array and determine what type of file was read. I have looked at quite a few different posts on the matter and have not been able to figure out where I am going wrong. I am trying to print out my buffer array at the bottom to make sure it holds the correct contents of a file before stepping though it to determine the file type, but the buffer holds nothing. Any help would be greatly appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/unistd.h>
#include <sys/time.h>
#include <sys/types.h>
#include <time.h>
int main(int argc, char *argv[])
{
char *currentDir = NULL;
DIR *myDir = NULL;
struct dirent *myFile = NULL;
struct stat myStat;
const void *buf [1024];
int count;
int currentFile;
if (strcmp(argv[1], "ls") == 0 && argc < 3)
{
currentDir = getenv("PWD");
myDir = opendir(currentDir);
while ((myFile = readdir(myDir)) != NULL)
{
if (myFile->d_name[0] != '.')
{
puts(myFile->d_name);
//printf("%s\n", myFile->d_name);
}
}
closedir(myDir);
}
if (strcmp(argv[1], "ls") == 0 && strcmp(argv[2], "-t") == 0)
{
currentDir = getenv("PWD");
myDir = opendir(currentDir);
while ((myFile = readdir(myDir)) != NULL)
{
if (myFile->d_name[0] != '.')
{
printf("%s\n", myFile->d_name);
stat (myFile->d_name, &myStat);
printf("Last Accessed:\t%s\n", ctime(&myStat.st_atime));
printf("Last Modified:\t%s\n", ctime(&myStat.st_mtime));
printf("Last Changed:\t%s\n", ctime(&myStat.st_ctime));
}
}
closedir(myDir);
}
if (strcmp(argv[1], "ls") == 0 && strcmp(argv[2], "-f") == 0)
{
currentDir = getenv("PWD");
myDir = opendir(currentDir);
while ((myFile = readdir(myDir)) != NULL)
{
//while (count = read(0, buf, 100) > 0)
//{
//}
//write (1, buf, 100);
//printf ("Buffer Holds:\n %s\n", buf);
if (myFile->d_name[0] != '.')
{
while (count = read(myFile->d_name, buf, 100) > 0)
write (1, buf, count);
printf ("Buffer Holds:\n %s\n", buf);
}
}
}
return 0;
}
You need some more parens here:
while (count = read(myFile->d_name, buf, 100) > 0)
try:
while ((count = read(myFile->d_name, buf, 100)) > 0)
Also, recommend using sizeof:
while ((count = read(myFile->d_name, buf, sizeof(buf))) > 0)
But you've declared buf as an array of pointers:
const void *buf [1024];
which doesn't seem likely to be what you actually want. Are there really pointer values stored in the file? I think you probably meant for buf to be an array of chars:
char buf[1024];
I was able to figure out what was going wrong, I did have to change the buf array to an array of chars, but I had some misconceptions on how read was working. I though that read() was reading bytes from the file and storing it into a temp array, so I thought I needed to use write() to write the information from a temp array into the array that I specified. In actuality, read() read the specified file and stored its contents directly into my char buf [1024] array, so the call to write() was actually overwriting all the information read() had read from the specified file, and stored into the char buf [1024] array.
Thank you all for the reply's, I have only posted on here 1 other time, so I am still trying to figure out how to explain the issues I am encountering with less ambiguity.

Compare each line from two different files and print the lines that are different in C

Supposing that I have two files like this:
file1.txt
john
is
the new
guy
file2.txt
man
the old
is
rick
cat
dog
I'd like to compare first line from file1 with all the lines from file2 and verify if it exist. If not, go two the second line from file1 and compare it with all the lines from file2.. and so on until eof is reached by file1.
The output that I expect is:
john
the new
guy
How I thought this should be done:
read file1 and file2
create a function which returns the line number of each of them
take the first line from file1 and compare it to all the lines from file2
do this until all the lines from file1 are wasted
Now, I don't know what I'm doing wrong, but I don't get the result that I expect:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int countlines(char *filename)
{
int ch = 0, lines = 0;
FILE *fp = fopen(filename, "r");
if (fp == NULL)
return 0;
do {
ch = fgetc(fp);
if (ch == '\n')
lines++;
} while (ch != EOF);
if (ch != '\n' && lines != 0)
lines++;
fclose(fp);
return lines;
}
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
int counter = 0;
for (int i = 0; i < countlines(argv[1]); i++)
{
fgets(buffer_line_template_file, 100, template_file);
for (int j = 0; j < countlines(argv[2]); j++)
{
fgets(buffer_line_data_file, 100, data_file);
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
counter++;
printf("%d", counter);
}
}
}
printf("\n\n");
return 0;
}
Could someone please point me into the right direction ? For testing purposes I created a counter at the end which was a part of a small debug. There should be the print() function
As per #chux answer I got the following simplified code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
while(fgets(buffer_line_template_file, 100, template_file))
{
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
printf("%s\n", buffer_line_template_file);
}
}
}
printf("\n\n");
return 0;
}
The above code is giving me the following output, which is not what is expected:
john
john
john
john
john
john
is
is
is
is
is
the new
the new
the new
the new
the new
the new
guy
guy
guy
guy
guy
guy
Problems with OP's code
Imprecise definition of line.
Excessive recalculation
Fuzzy determination of the number of lines in a file.
Unlike string, which has a precise definition in C, reading a line is not so well defined. The primary specificity issue: does a line contain the trailing '\n'. If the first answer is Yes, then does the last text in a file after a '\n' constitute a line? (Excessively long lines are another issue, but let us not deal with that today.)
Thus possibly some lines end with '\n' and others do not, fooling strcmp("dog", "dog\n").
The easiest solution is to read a line until either 1) a '\n' is encountered, 2) EOF occurs or 3) line buffer is full. Then after getting a line, lop off the potential trailing '\n'.
Now all lines code subsequently works with have no '\n'.
fgets(buffer_line_template_file, 100, template_file);
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
OP's loop is incredible wasteful. Consider a file with 1000 lines. Code will loop, calling 1000 times countlines() (each countlines() call reads 1000 lines) times when one countlines() call would suffice.
// for (int j = 0; j < countlines(argv[2]); j++)
int j_limit = countlines(argv[2]);
for (int j = 0; j < j_limit; j++)
There really is no need to count the line anyways, just continue until EOF (fgets() returns NULL). So no need to fix its fuzzy definition. (fuzzy-ness concerns same issues as #1)
int counter = 0;
for (fgets(buffer_line_template_file, 100, template_file)) {
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while ((fgets(buffer_line_data_file, 100, data_file)) {
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0) {
counter++;
printf("%d", counter);
}
}
}
Other simplifications possible - for another day.
FWIW, following counts lines of text allowing the last line in the file to optionally end with a '\n'.
unsigned long long FileLineCount(FILE *istream) {
unsigned long long LineCount = 0;
rewind(istream);
int previous = '\n';
int ch;
while ((ch = fgetc(inf)) != EOF) {
if (previous == '\n') LineCount++;
previous = ch;
}
return LineCount;
}
Note that this function may get a different result that fgets() calls. Consider a file of one line of 150 characters. fgets(..., 100,...) will report 2 lines. FileLineCount() reports 1.
[Edit] Updated code to conform to OP functionality.
int found = 0;
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) == 0)
{
found = 1;
break;
}
}
if (!found) printf("%s\n", buffer_line_template_file);
This program prints the diff of two files file1.txt and file2.txt.
#include<stdio.h>
#include <stdlib.h>
#include <memory.h>
int main() {
FILE *fp1, *fp2;
int ch1, ch2;
char fname1[40], fname2[40];
char *line = NULL;
size_t len = 0;
ssize_t read;
char *line2 = NULL;
size_t len2 = 0;
ssize_t read2;
fp1 = fopen("file1.txt", "r");
fp2 = fopen("file2.txt", "r");
if (fp1 == NULL) {
printf("Cannot open %s for reading ", fname1);
exit(1);
} else if (fp2 == NULL) {
printf("Cannot open %s for reading ", fname2);
exit(1);
} else {
while ((read = getline(&line, &len, fp1)) != -1 && (read2 = getline(&line2, &len2, fp2)) != -1) {
if (!strcmp(line, line2)) {
printf("Retrieved diff on line %zu :\n", read);
printf("%s", line);
}
}
if (ch1 == ch2)
printf("Files are identical \n");
else if (ch1 != ch2)
printf("Files are Not identical \n");
fclose(fp1);
fclose(fp2);
}
return (0);
}
You already have a very good answer (and always will from chux), but here is a slightly different approach to the problem. It uses automatic storage to reading file2 into an array of strings and then compares each line in file1 against every line in file2 to determine whether it is unique. You can easily convert the code to dynamically allocate memory, but for sake of complexity that was omitted:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 256, MAXL = 512 };
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1);
int main (int argc, char **argv) {
FILE *fp1 = fopen (argc > 1 ? argv[1] : "file1.txt", "r");
FILE *fp2 = fopen (argc > 2 ? argv[2] : "file2.txt", "r");
size_t n1 = 0, n2 = 0;
if (!fp1 || !fp2) {
fprintf (stderr, "error: file open failed.\n");
return 1;
}
printf ("\nunique words in file1, not in file 2.\n\n");
file1infile2 (fp2, fp1, &n2, &n1);
printf ("\nanalyzed %zu lines in file1 against %zu lines in file2.\n\n",
n1, n2);
return 0;
}
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1)
{
char buf[MAXC] = "";
char f2buf[MAXL][MAXC] = { "" };
size_t i;
*n1 = *n2 = 0;
while (*n2 < MAXL && fgets (buf, MAXC, fp2)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0;
strcpy (f2buf[(*n2)++], buf);
}
while (*n1 < MAXL && fgets (buf, MAXC, fp1)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0, (*n1)++;
for (i = 0; i < *n2; i++)
if (!(strcmp (f2buf[i], buf)))
goto matched;
printf (" %s\n", buf);
matched:;
}
}
Look over the code and let me know if you have any questions.
Example Use/Output
$ ./bin/f1inf2 dat/f1 dat/f2
unique words in file1, not in file 2.
john
the new
guy
analyzed 4 lines in file1 against 6 lines in file2.

Reading a file word by word with no ending chars

I have this small program in C that reads through a file a compares word by word,
how can I assure that words like "this," won't be read as a word? I would like it to read as "this"
int main(int argc, char *argv[]) {
if(argc != 3)
{
printf("Usage: ./sw <word> <filename> \n");
exit(1);
}
char* word = argv[1];
const char* filename = argv[2];
FILE* file = fopen(filename, "r");
if(file == NULL)
{
printf("Could not open file\n");
exit(1);
}
//Assuming one word can not have more than 250 chars
char w[250], check_eof;
do
{
check_eof = fscanf(file, "%s", w);
if(strcmp(word, w) == 0)
{
printf("W : %s \n", w);
}
} while(check_eof != EOF);
fclose(file);
return 0;
}
You can check if a char belongs to a word like this
int c = fgetc(file);
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
// c belongs to a word
word[n++] = c;
} else {
// end of word
if (strncmp(word, w, n) == 0) {
// word and w match!
}
}
If you #include <ctype.h>, then you can call isalpha(c) instead to test it.
In the code below, I use isalpha() and I copy the result string in a new buffer named res. However, this procedure can be done in-place, but I'll leave now for the sake of simplicity.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h> // for isalpha()
int main(int argc, char *argv[]) {
char* word = "this";
const char* filename = "test.txt";
FILE* file = fopen(filename, "r");
if(file == NULL)
{
printf("Could not open file\n");
exit(1);
}
//Assuming one word can not have more than 250 chars
// ATTENTION, it is 249 chars, do NOT forget of the null terminator
char w[250], res[250];
int check_eof; // should be of type int, for EOF checking
do
{
check_eof = fscanf(file, "%s", w);
// what if 'word' appears as the last word
// in the file? You should check for eof
// right after fscanf()
if(check_eof == EOF)
break;
int i = 0, j = 0;
while (w[i]) // parse what we read
{
if (isalpha(w[i]))
res[j++] = w[i]; // keep only the alphabetic chars
i++;
}
res[j] = '\0'; // it should be a null terminated string
if(strcmp(word, res) == 0) // compare to 'res' now
{
printf("W : %s \n", res);
}
} while(1); // the terminating step is inside the body now
fclose(file);
return 0;
}

Resources