How to use strstr() function? - c

I have two files blacklist.txt and email.txt. The blacklist.txt contains some domain names. The email.txt also contains a few domain names. I have to compare both files and find the domain names of blacklist.txt into email.txt using the strstr() function. Following is the code I have written. The problem with this code is it returns me the output NULL instead of the matched text/domain name.
#include <stdio.h>
#include <string.h>
#define MAXCHAR 1000
int main() {
FILE *fp, *fp1;
char str[MAXCHAR];
char str2[MAXCHAR];
char *result;
fp = fopen("blacklist.txt", "r");
fp1 = fopen("email.txt", "r");
if (fp == NULL || fp1 == NULL) {
printf("\n Cannot open one of the files %s %s \n", fp, fp1);
//return = 0;
if (fp != NULL) fclose(fp);
if (fp1 != NULL) fclose(fp1);
}
while (fgets(str, MAXCHAR, fp) != NULL || fgets(str2, MAXCHAR, fp1) != NULL)
//printf("%s, %s", str,str2);
fclose(fp);
fclose(fp1);
result = strstr(str, str2);
printf("The substring starting from the given string: %s", result);
return 0;
}

Here are some remarks on your code:
printing the error message has undefined behavior because you pass the FILE* pointers instead of the file names.
your program has undefined behavior because the body of the while loop is missing.
Since one cannot assume that both files be sorted, each line from blacklist.txt should be tested against all lines in email.txt.
if we can assume that both lines end with a newline, a match with strstr() means the line from the second file s a suffix of the line from the first file. It is a domain match if the return value is the start of the buffer and it is a match for a subdomain if the previous character is a ..
Here is a modified version:
#include <stdio.h>
#include <string.h>
#define MAXCHAR 1000
int main() {
FILE *fp, *fp2;
char str[MAXCHAR];
char str2[MAXCHAR];
// open and check blacklist.txt
fp = fopen("blacklist.txt", "r");
if (fp == NULL) {
printf("Cannot open %s\n", "blacklist.txt");
return 1;
}
// open and check email.txt
fp2 = fopen("email.txt", "r");
if (fp2 == NULL) {
printf("Cannot open %s\n", "email.txt");
fclose(fp);
return 1;
}
// for each line in blacklist.txt
while (fgets(str, MAXCHAR, fp) != NULL) {
// restart from the beginning of email.txt
rewind(fp2);
// for each line of email.txt
while (fgets(str2, MAXCHAR, fp2) != NULL) {
// check for a domain match
char *p = strstr(str, str2);
if (p != NULL && (p == str || p[-1] == '.')) {
// compute the length of the domains (excluding the newline)
int n = strcspn(str, "\n");
int n2 = strcspn(str2, "\n");
// output the message with the matching domains
printf("domain match on %.*s for %.*s\n", n2, str2, n, str);
}
}
}
fclose(fp);
fclose(fp2);
return 0;
}

Related

Running is fine but rename() does not work

Simple function code to delete a line from a text file by making a temporary text file that will store the new content once the line has been deleted and replacing the old Storage.txt file with the temporary file.
The delete() function works but my only problem seems to be the rename() function that seemingly won't do as intended.
THE CODE
void delete() {
struct task task;
FILE *fp;
char str[100];
char ch;
int delete_line = 0;
fp = fopen("Storage.txt", "r");
if (fp == NULL) {
printf("Error opening file");
fopen("Storage.txt", "w");
exit (1);
}
printf("\n\n\nAll Tasks\n");
printf("----------\n\n");
do {
ch = fgetc(fp);
printf("%c", ch);
} while (ch != EOF);
fclose(fp);
int line_no,ret;
char filename[] = "Storage.txt";
char newname[] = "temp.txt";
FILE *file, *temp;
file = fopen("Storage.txt", "r");
temp = fopen("temp.txt", "w");
printf("Select Line to delete: ");
scanf("d", &delete_line);
getchar();
temp = fopen("temp.txt", "w");
while (fgets(str, 99, fp) != NULL) {
line_no++;
if (line_no != delete_line) {
fputs(str, temp);
}
}
fclose(file);
fclose(temp);
remove(filename);
ret = rename(newname, filename);
if (ret == 0) {
printf("File renamed successfully");
} else {
printf("Error: unable to rename the file");
}
}
There are some problems in the code:
ch must be defined with type int to detect EOF reliably.
the do/while loop to read the file contents outputs the EOF indicator before testing it. You should use while ((ch = fgetc(fp)) != EOF) putchar(ch);
the identifier delete should be avoided to avoid confusing C++ programmers, use delete_line instead.
you should test for failure of fopen and remove and display the cause of the error.
if opening the file for reading fails, why do you create the file with fopen("Storage.txt", "w") ?
file temp.txt is open twice, which may prevent the rename operation on legacy systems.
line_no is not initialized. It should be initialized to 1 if lines are numbered starting at 1.
reading lines into an array is not reliable for this task as lines longer than 99 bytes will be counted more than once.
Here is a modified version:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void delete_line() {
const char *filename = "Storage.txt";
const char *tempname = "temp.txt";
int ch;
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Cannot open file %s: %s\n", filename, strerror(errno));
exit(1);
}
printf("\n\n\nAll Tasks\n");
printf("----------\n\n");
while ((ch = getc(fp)) != EOF) {
putchar(ch);
}
fclose(fp);
int delete_line = 0;
printf("Select Line to delete: ");
if (scanf("d", &delete_line) != 1) {
fprintf(stderr, "invalid or missing input\n");
exit(1);
}
// read and discard the rest of the user input line
while ((ch = getchar()) != EOF && c != '\n')
continue;
FILE *file = fopen(filename, "r");
if (file == NULL) {
fprintf(stderr, "Cannot open file %s: %s\n", filename, strerror(errno));
exit(1);
}
FILE *temp = fopen(tempname, "w");
if (temp == NULL) {
fprintf(stderr, "Cannot open file %s: %s\n", tempname, strerror(errno));
fclose(file);
exit(1);
}
int line_no = 1;
while ((ch = getc(file)) != EOF) {
if (line_no != delete_line)
putc(ch, temp);
if (ch == '\n')
line_no++;
}
fclose(file);
fclose(temp);
if (remove(filename)) {
fprintf(stderr, "Cannot remove %s: %s\n", filename, strerror(errno));
} else {
if (rename(tempname, filename)) {
fprintf(stderr, "Cannot rename %s as %s: %s\n",
tempname, filename, strerror(errno));
}
}
}
Your code opens the "temp.txt" file twice:
temp = fopen("temp.txt", "w");
...
temp = fopen("temp.txt", "w");
And closes it once. That will leave one open file descriptor to the file, untill the program exits.
remove() uses unlink() for deleting files. The man page of unlink() says:
If the name was the last link to a file but any processes still have
the file open the file will remain in existence until the last file
descriptor referring to it is closed.
Ensure that all file descriptors are closed when not needed anymore.
The rename may fail, if file of oldpath or newpath is still open.
temp = fopen("temp.txt", "w"); Call it twice
The two main bugs here are:
1.
scanf("d", ...) instead of
scanf("%d", ...)
scanf() needs a format string to know how to parse the input, just like printf() (the f is for format) needs it to know how to construct the output; and their format string syntax is almost the same.
2.
Unintialized line_no, meaning that it's not guaranteed to start at 0/1, thus it might not ever be equal to delete_line, and will not delete the line.

How to print the text from a file if the line number is given?

I want to give input as line number and get output as the corresponding text for that line number in a text file.
Sample text file:
Hi this is Stefen
Hi How are you
Example input:
Enter the line number:2
Expected Output:
Hi How are you
My program is:
#include <stdio.h>
#include <stdlib.h>
int main() {
FILE *fp;
fp = fopen("sample.txt", "r");
if (fp == NULL) {
perror("Unable to open the file\n");
exit(1);
}
char buf[256];
while (fgets(buf, sizeof(buf), fp) != NULL) {
printf("%s\n", buf);
print("~~~~\n");
}
fclose(fp);
return 0;
}
Output I got:(The entire file with the separator ~~~~ below each line)
Hi this is Stefen
~~~~
Hi How are you
~~~~
Can anyone please tell me how to do this?
As pmg suggests, would you please try the following:
#include <stdio.h>
#include <stdlib.h>
#define INFILE "sample.txt"
int main()
{
FILE *fp;
char buf[BUFSIZ];
int count = 0, n;
fp = fopen(INFILE, "r");
if (fp == NULL) {
perror(INFILE);
exit(1);
}
printf("Enter the line number: ");
fgets(buf, sizeof buf, stdin);
n = (int)strtol(buf, (char **)NULL, 10);
while (fgets(buf, sizeof buf , fp) != NULL){
if (++count == n) {
printf("%s", buf);
break;
}
}
fclose(fp);
return EXIT_SUCCESS;
}
Best to use a second file
check if you're at \n that means new line and increment a variable like "line"
printf(" \n Enter line number of the line to be deleted:");
scanf("%d", &delete_line);
//open new file in write mode
ptr2 = fopen("c:\\CTEMP\\newfile.txt", "w");
if(ptr2==NULL)
printf("second error opening newfile");
while (!feof(ptr1))
{
ch = fgetc(ptr1);
if (ch == '\n')
{
temp++;
}
//except the line to be deleted
if (temp != delete_line)
{
//copy all lines in file newfile.c
fputc(ch, ptr2);
}
}
fclose(ptr1);
fclose(ptr2);
"detele_line" variable is for the user to inter.
The easiest way is using array to save the lines, then print the certain line.
#include <stdio.h>
#define M 10010
#define N 256
char buf[M][N];
int main(){
FILE *file;
char fileName[50] = "sample.txt";
file = fopen(fileName, "r");
if(file == NULL)
return 1;
int n = 0;
while(fgets(buf[n], N, file) != NULL){
n++;
}
fclose(file);
int i, x;
printf("Example input:\nEnter the line number:");
scanf("%d", &x);
printf("Expected Output:\n%s", buf[x-1]);
return 0;
}

Strcmp not comparing strings from argv

** Updated 26/10 -> First of all thank you all for you help, I am getting closer now, I need more work and studying but I really appreciate you are helping me a lot :-)
Still don't know why the first "rain" word in the input.txt file is not getting the positive output from strcmp, and from the cmd I can see the "<" bracket don't appear except for the LAST line which is the line that works.
Also checked the highlighted response from Removing trailing newline character from fgets() input
and even if I change the code to the following:
while( fgets (line, sizeof line, fp)!=NULL ) {
/* remove \n from at the end of the str buffer*/
char * pos;
/*
if ((pos = strchr(line, '\n')) != NULL)
*pos = '\0';
*/
line[strcspn(line, "\n")] = 0;
I get the same result as if I use the if block instead. Maybe I'm getting extra \0 which might be the case. Anyone has a link where I can read about the delimiters I just used, or a nice reference of a debugger, etc. ... which I will have a look as soon as I come here? Thank you so much in advance!
read5.c version: Now from that input.txt file, it had an extra space on the last "rain" word, I removed the space, and it was able to find and get that last word compare as a true result, running in the strcmp if block. but that was the only string that was a true positive result from that if block.
on the cmd I can see:
$./read5 input.txt rain output.txt sun
>Maria
>rain
>manel
>Bla bla
<rain>
Found it! rain
On the output.txt it becomes:
Maria
rain
manel
Bla bla
sun
read5.c
#include <stdio.h>
#include <string.h>
/**
* Compile program:
* gcc read3.c -o read3
*
*/
int main (int argc, char **argv) {
FILE *fp, *fo;
char *compare, *replace;
char line[246];
if (argc <= 4){
printf(">Missing arguments on the command line.\n");
printf(">Be sure you run the program as\n\"./read3 input.txt compare outout.txt replace\"\n\n");
}
/* opening file for reading */
fp = fopen(argv[1] , "r");
if(fp == NULL){
perror("Error opening input file");
return 1;
}
compare = argv[2];
fo = fopen(argv[3], "w");
if(fo == NULL){
perror("Error opening output file");
return 1; //TODO check if: return 1 because it was expected, right?
}
replace = argv[4];
/*
printf(); made to test version 2
//printf("We are going to compare %s\n", compare);
//printf("We are going to replace it with %s\n", replace);
*/
while( fgets (line, sizeof line, fp)!=NULL ) {
/* remove \n from at the end of the str buffer*/
char * pos;
if ((pos = strchr(line, '\n')) != NULL)
*pos = '\0';
/* print str enclosed in <> so we can see what str actually contains */
//printf("Inside the loop, got the string: %s\n", line);
//printing the strings with defined delimiters
printf("<%s>\n", line);
if(strcmp(compare, line) == 0){
printf("Found it! %s \n", line);
fprintf(fo, "%s\n", replace);
}
else{
fprintf(fo, "%s\n", line);
}
}
fclose(fp);
fclose(fo);
return 0;
}
First question with no edits: 25/10
I need to make a program that is run like this:
./read2 input.txt rain output.txt sun
It reads the input.txt, searches for rain string and if finds it, replaces it with sun string and outputs all the text from input.txt with the replacements to the output.txt.
But with the code that I have so far, the strcmp is not comparing the strings I want, maybe it has the extra space that I get on the command line, I don't know... for now what is doing is copying everything from input.txt to output.txt... It's running the else block always...
Read2.c:
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv) {
FILE *fp, *fo;
char str[60];
//char* token;
/* opening file for reading */
fp = fopen(argv[1], "r");
char *compare = argv[2];
fo = fopen(argv[3], "w+");
char *replace = argv[4];
if (fp == NULL) {
perror("Error opening file");
return(-1);
}
//printf("We are going to compare %s\n", compare);
//printf("We are going to replace it with %s\n", replace);
while (fgets(str, 60, fp) != NULL) {
/* writing content to stdout */
//Take the \n out
//token = strtok(str, "\n");
printf("Inside the loop, got the string: %s\n", str);
if (strcmp(compare, str) == 0) {
//puts(str);
printf("Found it! %s \n", str);
fprintf(fo, "%s", replace);
} else {
fprintf(fo, "%s", str);
}
}
fclose(fp);
return(0);
}
input.txt:
Maria
rain
manel
Bla bla
rain
Ouput.txt becomes exactly as input.txt and before it was empty, so the code is working, except the if block that tests with strcmp.
The problem is the \n at the end of the str buffer. fgets adds the \n at end end of the line it reads, you need to get rid of it before comparing.
This is what you need:
while (fgets(str, 60, fp) != NULL) {
/* remove \n from at the end of the str buffer*/
char *pos;
if ((pos = strchr(str, '\n')) != NULL)
*pos = '\0';
/* print str enclosed in <> so we can see what str actually contains */
printf("Inside the loop, got the string: <%s>\n", str);
if (strcmp(compare, str) == 0) {
printf("Found it! %s\n", str);
fprintf(fo, "%s\n", replace);
}
else {
fprintf(fo, "%s\n", str);
}
}
Look at the comments in the code for explanations.
read.c
#include <stdio.h>
#include <string.h>
/**
* How to compile program:
* gcc read.c -o read
*
* How to run the program:
* .> ./read input.txt rainy output.txt sunny
* (On Windows MinGW compiler, simply:
* .> read input.txt rainy output.txt sunny - without ./)
*
*/
int main (int argc, char **argv) {
FILE *fp, *fo;
char *compare, *replace;
char line[246];
if (argc <= 4){
printf(">Missing arguments on the command line.\n");
printf(">Be sure you run the program as\n\"./read input.txt compare outout.txt replace\"\n\n");
}
/* Opening files for reading */
fp = fopen(argv[1] , "r");
if(fp == NULL){
perror("Error opening input file");
return 1;
}
compare = argv[2];
fo = fopen(argv[3], "w");
if(fo == NULL){
perror("Error opening output file");
return 1;
}
replace = argv[4];
while( fgets (line, (sizeof line), fp)!=NULL ) {
line[strcspn(line, "\n")] = 0;
if(strcmp(compare, line) == 0){
printf("Found it! %s \n", line);
fprintf(fo, "%s\n", replace);
}
else{
fprintf(fo, "%s\n", line);
}
}
fclose(fp);
fclose(fo);
return 0;
}
/*
Important info
strcspn ::
Locate first occurrence of character in string,
after locating the first occurrence of \n, replaces it by 0.
Sources::
https://stackoverflow.com/questions/2693776/removing-trailing-newline-character-from-fgets-input/28462221#28462221
Used to debug:
.>printf("1st: Reads input.txt, removes '\\n' from fgets, and prints it \n");
.>printf("2nd: Compares each line with 'rainy' \n");
.>printf("<%s>\n", line);
*/
input.txt
cloudy
rainy
chilly
rainy
rainy
Your approach fails because the lines read from the input file contain a trailing newline '\n' that makes the comparison return non zero.
You can strip the newline before comparing with the search string.
Note that there are other problems:
you should verify that enough command line arguments have been passed by testing argc > 4.
there is no need to open the output file in update mode "w+", "w" is simpler and better.
60 bytes is a bit small for the line array, limiting the longest line handled correctly to 58 bytes.
Here is an improved version:
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv) {
FILE *fp, *fo;
char *compare, *replace;
char line[256];
if (argc <= 4) {
printf("missing command line arguments\n");
return 1;
}
fp = fopen(argv[1], "r");
if (fp == NULL) {
perror("Error opening input file");
return 1;
}
compare = argv[2];
fo = fopen(argv[3], "w");
if (fo == NULL) {
perror("Error opening output file");
return 1;
}
replace = argv[4];
while (fgets(line, sizeof line, fp) != NULL) {
line[strcspn(line, "\n")] = '\0';
if (strcmp(line, compare) == 0) {
printf("fount it!);
fprintf(fo, "%s\n", replace);
} else {
fprintf(fo, "%s\n", line);
}
}
fclose(fp);
fclose(fo);
return 0;
}
Note that long lines will be broken into chunks that fit in the line array, so there may be false positives with the above naive approach.
You can remove this limitation completely with this inner loop:
int c;
int pos = 0;
int cmplen = strlen(compare);
for (;;) {
c = getc(fp);
if (c == '\n' || c == EOF) {
if (pos == cmplen) {
fprintf(fo, "%s", replace);
} else
if (pos > 0) {
fprintf(fo, "%*s", pos, compare);
}
pos = 0;
if (c == EOF)
break;
} else {
if (pos >= 0) {
if (compare[pos] == (char)c) {
pos++;
continue;
}
if (pos > 0) {
fprintf(fo, "%*s", pos, compare);
}
pos = -1;
}
}
putc(c, fo);
}

Get integer after a character from file

I'm trying to read the number of a txt file like this:
input=20
output=10
hidden=5
....
I tried with this code:
char line[30];
char values[100][20];
int i = 0;
FILE *fp;
fp = fopen("myFile.txt", "r");
if(fp == NULL)
{
printf("cannot open file\n");
return 0;
}
while(fgets(line, sizeof(line), fp) != NULL)
{
sscanf(line, "%[^=]", values[i])
printf("%s\n", values[i]);
i++;
}
fclose(fp);
But I obtain only the first word and never the number after the =.
I get
input
output
etc
instead of
20
10
5
etc
How can I get the number??
This line
sscanf(line, "%[^=]", values[i]);
means "read everything up to, but not including, the = sign into values[i]".
If you are interested in the numeric part after the equal sign, change the call as follows:
sscanf(line, "%*[^=]=%19s", values[i]);
This format line means "read and ignore (because of the asterisk) everything up to, and including, the equal sign. Then read a string of length of up to 19 characters into values[i]".
Demo.
Don't use sscanf() for that, redeclare values to store the integers like
int values[LARGE_CONSTANT_NUMBER];
and after fgets() just use strchr
char *number;
number = strchr(line, '=');
if (number == NULL)
continue;
number += 1;
values[i] = strtol(number, NULL, 10);
you could also use malloc() and realloc() if you wish, to make the values array dynamic.
Try it if you like
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(void)
{
char line[100];
int values[100];
int i;
FILE *fp;
size_t maxIntegers;
fp = fopen("myFile.txt", "r");
if (fp == NULL)
{
perror("cannot open file\n");
return 0;
}
i = 0;
maxIntegers = sizeof(values) / sizeof(values[0]);
while ((fgets(line, sizeof(line), fp) != NULL) && (i < maxIntegers))
{
char *number;
number = strchr(line, '=');
if (number == NULL) /* this line does not contain a `=' */
continue;
values[i++] = strtol(number + 1, NULL, 10);
printf("%d\n", values[i - 1]);
}
fclose(fp);
return 0;
}
with this technique you avoid unecessarily storing the number as a string.

Search for keyword in textual file C

I'm having trouble reading a keyword from file that a user inputs to search for. The first part of the program asks for user input for naming the file. It then asks for sentence input. You can input sentences until you write "END". When you write "END", the appending of sentences to file should stop and the program should ask you for a keyword to search the sentences appended to the newly created textual file. I used 'gets' to ask for a word that will be searched for in the file. The program should find that word in a sentence and print back the whole sentence containing the keyword. The whole code looks like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char fileName[128];
printf("Input your filename (end with .txt):");
gets(fileName);
FILE *filePointer = NULL;
char text1[128];
char word1[128];
filePointer = fopen(fileName, "a");
if(filePointer == NULL)
{
printf("Cannot open file!");
exit(EXIT_FAILURE);
}
else{
printf("Input your sentence: ");
while (fgets(text1, 127, stdin) != NULL && strncmp(text1, "END\n", 5) != 0){
printf("Input your sentence: ");
fprintf(filePointer, "%s", text1);
}
int line_num = 1;
int find_result = 0;
char text2[128];
filePointer = fopen(fileName, "r");
printf("Input keyword you're looking for: ");
gets(word1);
while(fgets(text2, 127, filePointer) != NULL) {
if((strstr(text2, word1)) != NULL) {
printf("A match found on line: %d\n", line_num);
printf("\n%s\n", tekst2);
find_result++;
}
line_num++;
}
if(find_result == 0) {
printf("\nSorry, couldn't find a match.\n");
}
if(filePointer) {
fclose(filePointer);
}
return(0);
}
}
It all works, but the problem is somewhere here:
int line_num = 1;
int find_result = 0;
char text2[128];
filePointer = fopen(fileName, "r");
printf("Input keyword you're looking for: ");
gets(word1);
while(fgets(text2, 127, filePointer) != NULL) {
if((strstr(text2, rijec)) != NULL) {
printf("A match found on line: %d\n", line_num);
printf("\n%s\n", text2);
find_result++;
}
line_num++;
}
I'm new at C programming, so I'm not sure where the flaw is. I know it should work in theory. It doesn't return a result when it clearly should.
You need to fclose() the file after writing, before reopening to read.
if (fclose(filePointer) != 0)
{
fputs("The sky is falling.", stderr);
return 1;
}
filePointer = fopen(fileName, "r");

Resources