grep implementation for multi-word search - c

I wrote a program that implements the grep command. I tried to search by several words. I use the -e WORD option to search by word in the target file, so if the option is present several times in the command line you can search for more words grep will display any line that contains at least one searched word.
I would like you to criticize the implementation of this option (-e WORD). What possible problems may arise?
Something tells me that I could have implemented it even better
I think the treatment of this grep option could have been better written
code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char** argv){
if(argc < 3) {
printf("Usages:\n1: ./grep.o <word> <file>\n2: ./grep.o -e <word> [-e <word>...] <file>\n");
return 1;
}
else {
FILE* fp;
char* line = NULL;
size_t len = 0;
ssize_t read;
int i;
fp = fopen(argv[argc - 1], "r");
if(fp == NULL) {
printf("File doesn't exist or cannot be read!\n");
return 1;
}
if(argc == 3) {
while((read = getline(&line, &len, fp)) != -1) {
if(strstr(line, argv[1]) != NULL)
printf("%s", line);
}
}
else {
for(i = 1; i < argc - 2; i += 2) {
if(strcmp(argv[i],"-e") != 0) {
printf("The option must be \"-e\"!\n");
fclose(fp);
return 1;
}
}
if(i + 1 == argc - 1) {
if(strcmp(argv[i],"-e") != 0) {
printf("The option must be \"-e\"!\n");
}
else {
printf("The option must have a word after it!\n");
}
fclose(fp);
return 1;
}
while((read = getline(&line, &len, fp)) != -1) {
for(i = 1; i < argc - 2; i += 2) {
if(strstr(line, argv[i + 1]) != NULL) {
printf("%s", line);
break;
}
}
}
fclose(fp);
if(line)
free(line);
}
return 0;
}
}

Related

Possible errors when reading larger lines

I noticed a problem with the next line while ((read = getline(&line, &len, fp)) != -1) {: I may get errors if the lines read are too large. Should I read MAXLINE pieces? I think it's a problem if I break the original line into pieces. It is possible to "cut" the fixed line in the middle of the word I am looking for.
For example, if you break the car for sale (you are looking for ) in car fo and r sale. I will not find for in either of the two pieces. Maybe a solution would be for each part read, to read extra len (search_word) characters (without changing the original place where the next piece would have started). Basically I read more r s in the first part, but it guarantees that I will find for
How can I handle possible errors?
The following program is based on the implementation of grep from linux, doing a search for several words
program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char** argv)
{
if (argc < 3)
{
printf("Usages:\n1: ./grep.o <word> <file>\n2: ./grep.o -e <word> [-e "
"<word>...] <file>\n");
return 1;
}
else
{
FILE *fp;
char *line = NULL;
size_t len = 0;
ssize_t read;
int i;
fp = fopen(argv[argc - 1], "r");
if (fp == NULL)
{
printf("File doesn't exist or cannot be read!\n");
return 1;
}
if (argc == 3)
{
while ((read = getline(&line, &len, fp)) != -1)
{
if (strstr(line, argv[1]) != NULL)
printf("%s", line);
}
}
else
{
for (i = 1; i < argc - 2; i += 2)
{
if (strcmp(argv[i], "-e") != 0)
{
printf("The option must be \"-e\"!\n");
fclose(fp);
return 1;
}
}
if (i + 1 == argc - 1)
{
if (strcmp(argv[i], "-e") != 0)
{
printf("The option must be \"-e\"!\n");
}
else
{
printf("The option must have a word after it!\n");
}
fclose(fp);
return 1;
}
while ((read = getline(&line, &len, fp)) != -1)
{
for (i = 1; i < argc - 2; i += 2)
{
if (strstr(line, argv[i + 1]) != NULL)
{
printf("%s", line);
break;
}
}
}
fclose(fp);
if (line)
free(line);
}
return 0;
}
}
The problem you describe is not present in your code precisely because you use getline() instead of fgets(). getline() reallocates the line pointer and len as required to read a full line. The only limitation is memory, which is unlikely to pose a problem on current systems.
Note however that you should close the file and free line outside of the else branch to avoid a memory leak.

Comparing two txt files in c

I'm trying to compare 2 text files and print the first line where they differ but I'm using a buffer of 500 in the fgets() command and I think I'm wasting space.
How can I make the same program if I don't know the length of the line?
Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char const *argv[])
{
FILE *fp1, *fp2;
int nLine = 1;
char l1[500], l2[500];
system("clear");
if (argc < 3)
{
printf("Usage: %s <file1.txt> <file2.txt>\n",argv[0]);
exit(1);
}
if ((fp1 = fopen(argv[1],"r")) == NULL){
printf("Can't open file: %s\n", argv[1]);
exit(1);
}
if ((fp2 = fopen(argv[2],"r")) == NULL){
printf("Can't open file: %s\n", argv[2]);
exit(1);
}
fgets(l1,500,fp1);
fgets(l2,500,fp2);
while ((l1 != 0) && (l2 != 0)){
if(strcmp(l1,l2) != 0){
printf("Line number: %d\n", nLine);
printf("%s", l1);
printf("%s\n", l2);
exit(1);
} else {
fgets(l1,500,fp1);
fgets(l2,500,fp2);
nLine++;
}
}
return 0;
}
If you do not want to "waste space", remember that the data are in file memory. Read 1 character at time. When you find a difference, just seek to that location of the previous line feed and report the following lines.
long index = 0;
long index_lf = 0;
int c1,c2;
// read until a difference or done
while ((c1 = fgetc(fp1)) == (c2 = fgetc(fp2)) && (c1 != EOF)) {
index++;
if (c1 == '\n') index_lf = index;
}
if (c1 == c2) {
puts("same");
} else {
puts("differ");
fseek(fp1, index_lf, SEEK_SET);
fseek(fp2, index_lf, SEEK_SET);
// read and print each file until a following \n or EOF occurs.
// TBD code for OP
}
[Edit] Some improvements to cope with various issues: mis-match on last byte, files opened in different modes, error handling, etc.
long offset1 = ftell(fp1);;
long offset2 = ftell(fp2);;
int c1,c2;
// read until a difference or done
while ((c1 = fgetc(fp1)) == (c2 = fgetc(fp2)) && (c1 != EOF)) {
if (c1 == '\n') {
offset1 = ftell(fp1);
offset2 = ftell(fp2);
}
}
if (offset1 == -1 || offset2 == -1 || ferror(fp1) || ferror(fp2)) {
puts("problem");
} else if (c1 == c2) {
puts("same");
} else {
puts("differ");
fseek(fp1, offset1, SEEK_SET);
fseek(fp2, offset2, SEEK_SET);
// read and print each file until a following \n or EOF occurs.
// TBD code for OP
}

Xcode (C) "Check dependencies" and "Error: Executable doesn't exist"

I'm doing a school project in C using xcode. The task is to create a command line application that checks if and how many times a word appears in a text file. So far I've just created the first step of the code, I have set the first argue argument to "hej" even though Im not using the word yet, and because of the first filename is not entered and therefore NULL, I have set it to be an existing file called "hej.txt", in order to test my code as I go.
This bit worked before:
#include "stdio.h"
#include "string.h"
struct fileSearch
{
char *inFile;
char *outFile;
char *searchWord;
};
int main(int argc, char *argv[])
{
int index = 0;
struct fileSearch f;
f.searchWord = argv[1];
FILE *searchFile;
//For-loop to assign filenames
for(index = 0; index < argc ; ++index)
{
if((strcmp(argv[index], "-i") == 0) && argv[index] != NULL && strcmp(argv[index + 1], "-o") != 0)
{
f.inFile = argv[index + 1];
}
if((strcmp(argv[index], "-o") == 0) && argv[index] != NULL && strcmp(argv[index + 1], "-c") != 0)
{
f.outFile = argv[index + 1];
}
}
if (f.searchWord == NULL)
{
printf("You didnt enter a search word");
}
if (f.inFile == NULL)
{
f.inFile = "hej.txt";
}
if (f.outFile == NULL)
{
f.outFile = "stdout.txt";
}
printf("%s & %s & %s", f.searchWord, f.inFile, f.outFile);
return 0;
}
It then printed out the search word, the input filename and the output filename.
Then I added the next step of the code where it would open the text file "hej.txt" and then read the strings and print them.
#include "stdio.h"
#include "string.h"
struct fileSearch
{
char *inFile;
char *outFile;
char *searchWord;
};
int main(int argc, char *argv[])
{
int index = 0;
struct fileSearch f;
f.searchWord = argv[1];
FILE *searchFile;
//For-loop to assign filenames
for(index = 0; index < argc ; ++index)
{
if((strcmp(argv[index], "-i") == 0) && argv[index] != NULL && strcmp(argv[index + 1], "-o") != 0)
{
f.inFile = argv[index + 1];
}
if((strcmp(argv[index], "-o") == 0) && argv[index] != NULL && strcmp(argv[index + 1], "-c") != 0)
{
f.outFile = argv[index + 1];
}
}
if (f.searchWord == NULL)
{
printf("You didnt enter a search word");
}
if (f.inFile == NULL)
{
f.inFile = "hej.txt";
}
if (f.outFile == NULL)
{
f.outFile = "stdout.txt";
}
printf("%s & %s & %s", f.searchWord, f.inFile, f.outFile);
//Open file and read
char str[60];
searchFile = fopen(f.inFile, "r");
if(searchFile = fopen(f.inFile, "r") == NULL)
{
printf("Error: There is no file with that name.");
return -1;
}
if(searchFile == NULL)
{
printf("The file is empty");
return -1;
}
while(fgets(str, 60, searchFile) != NULL)
{
puts(str);
}
fclose(searchFile);
return 0;
}
However now it builds the code successfully but when I try to run it, I get this error message:
"error: executable doesn't exist: '/Users/Slattegard/Library/Developer/Xcode/DerivedData/Coursework-chwsqczjjgxcyegnxgysiaigslvq/Build/Products/Debug/Coursework'
error: failed to launch '/Users/Slattegard/Library/Developer/Xcode/DerivedData/Coursework-chwsqczjjgxcyegnxgysiaigslvq/Build/Products/Debug/Coursework'"
and the Issue Navigator gives me this message:
"Check dependencies
warning: no rule to process file '/Users/Slattegard/Documents/Coursework/Coursework/Find.c' of type sourcecode.c for architecture x86_64"
Anyone know where I am going wrong?

Program to print and sum numbers in a text file

I want to write a program which print all numbers found in a file and then add them up. I have two problems:
How to add up the numbers I've printed?
Why in output_file do I have so many commas:
Here's my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define CHUNK 12
char *getWord(FILE *infile);
void clean(char *dirty);
char *getWord(FILE *infile)
{
char *word, *word2;
int length, cursor, c;
word = (char*)malloc(sizeof(char)*CHUNK);
if(word == NULL) return NULL;
length = CHUNK;
cursor = 0;
while(!isspace(c = getc(infile)) && !feof(infile))
{
word[cursor] = c;
cursor++;
if(cursor >= length)
{
length += CHUNK;
word2 = (char*)realloc(word, cursor);
if(word2 == NULL)
{
free(word2);
return NULL;
}
else
{
word = word2;
}
}
}
word[cursor] = '\0';
return word;
}
void clean(char *dirty)
{
int i = 0, j = 0;
char *temp;
temp = strdup(dirty);
while(i < strlen(temp))
{
if(isdigit(temp[i]))
{
dirty[j] = temp[i];
j++;
}
i++;
}
dirty[j] = '\0';
free(temp);
}
int main(int argc, char *argv[])
{
char *word;
FILE *infile, *outfile;
if(argc != 3)
{
printf("Missing argument!\n");
exit(1);
}
infile = fopen(argv[1], "r");
if(infile != NULL)
{
outfile = fopen(argv[2], "w");
if(outfile == NULL)
{
printf("Error, cannot open the outfile!\n");
abort();
}
else
{
while(!feof(infile))
{
word = getWord(infile);
if(word == NULL)
{
free(word);
abort();
}
clean(word);
fputs(word, outfile);
fputs(",", outfile);
free(word);
}
}
}
else
{
printf("Error, cannot open the outfile!\n");
abort();
}
fclose(infile);
fclose(outfile);
return 0;
}
infile:
You are getting , because of this -
fputs(",", outfile);
It is related in structure to the echo unix command. The core of the program could be simplified to something along the following lines:
int c, need_comma = 0;
while ((c = fgetc(infile)) != EOF) {
if (isdigit(c)) {
fputc(c, outfile);
need_comma = 1;
}
else {
if (need_comma == 1) {
fputc(',', outfile);
need_comma = 0;
}
}
}
this removes the need for getWord and clean functions.
This is just the printing part. the intermediate file is in CSV format,
which is structured and easy to parse and add the numbers (and print the
result to another file).

unable to clear string during/after a while loop in c

I have a code that scans all the files in a directory for targeted words, and prints them out into a new file. The problem right now is after the while loop reads a file and stores a variable into the string (ex. customer), if the next file being read does not have the targeted word, it still displays the result stored in the string from the previous file. My goal is to make it display "N/A" if the current file does not have the target word.
I have tried a few ways to clear the string at the end or beginning of the while loop, but none of them work most of them just gives me a coredump error. Running out of ideas, any help would be much appreciated!
Code (shortened for easier reading):
int main(int argc, char** argv)
{
char directory[100];
char buff[100];
char delims[] = " :=";
char* result = NULL;
char* customer;
char* device;
char* buffer;
int i = 0;
DIR* FD;
struct dirent* in_file;
int c = 0;
printf("Enter directory:");
scanf("%s",directory);
FILE* ft = fopen("workorderlist.csv", "w"); /* Open file to write to*/
if (ft == NULL)
{
puts("Cannot open target file");
exit(1);
}
fprintf (ft, "Work Order,Customer,Device,Test_Prog,Software,DUT_board_id,Corl box\n");
/* Open Directory */
if (NULL == (FD = opendir(directory)))
{
puts("Cannot open directory");
return 1;
}
while ((in_file = readdir(FD)))
{
if (!strcmp (in_file->d_name, "."))
{
continue;
}
if (!strcmp (in_file->d_name, ".."))
{
continue;
}
/* Open files to read from */
buffer = (char*)malloc(100);
sprintf(buffer, "%s/%s", directory, in_file->d_name);
size_t len = strlen(buffer);
if (len >= 4 && memcmp(buffer + len - 4, ".wor", 4) == 0) /* checks if file ends with .wor */
{
FILE* fs = fopen(buffer, "r"); /* open file to read */
if (fs == NULL)
{
puts("Cannot open source file");
return 1;
}
/* Scanning each file for targeted words: */
while (fgets(buff, 100, fs) != NULL)
{
result = strtok( buff, delims );
while (result != NULL)
{
if ((strcmp(result, "Customer") == 0))
{
result = strtok(NULL,delims);
customer = (char*)malloc((strlen(result)+1)*sizeof(char));
strcpy(customer, result);
for (i = 0; i < strlen(customer) + 1; i++)
{
if (customer[i] == '\n')
{
break;
}
}
customer[i] = ' ';
}
if (strcmp(result, "device") == 0)
{
result = strtok(NULL, delims);
device = (char*)malloc((strlen(result) + 1) * sizeof(char));
strcpy(device, result);
for (i = 0; i < strlen(device) + 1; i++)
{
if(device[i] == '\n')
{
break;
}
}
device[i] = ' ';
}
result = strtok(NULL,delims);
}
}
if (customer == '\0')
{
customer = "N/A";
}
if (device == '\0')
{
device = "N/A";
}
fprintf(ft, "%s,%s,%s,%s,%s,%s,%s\n",
in_file->d_name, customer, device, testprog,
software, dutboardid, corlbox);
printf(in_file->d_name);
printf("\n");
fclose (fs) ;
c++;
}
}
printf("Total Workorders Found: %d (Info saved to workorderlist.csv)\n", c);
fclose(ft);
return 0;
}
First at all, customer/device are strings. You should not be doing == for it comparison. You can, for example, compare the first char of the string: device[0] == '\0';
You should do string initialization before the loop starts.
You can achieve this by using strcpy with a known value or any other string manipulation function. The value that you use to initialize the string before the loop is the one you gonna test with strcmp or similar later.
Is like with ints or any other C data type, but you need manipulation functions instead.
By the way, haven't you posted your read file loop in a question here too?
Hope this helps.

Resources