Find number of occurrences for the substring in a string using C programming - c

I am trying a program in c to read a text file that contains array of characters or a string and find the number of occurrences of the substring called "GLROX" and say sequence found when it is found. And the "inputGLORX.txt" contains following string inside it.
GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX
But i am getting wierd results. It would be great if some expert in C-programming helps me to solve this and thanks in advance.
#include <stdio.h>
#include <conio.h>
#include <string.h>
#define NUMBER_OF_STRINGS 40
#define MAX_STRING_SIZE 7
void seqFound()
{
printf("Sequence Found\n");
}
int main()
{
FILE *fp;
char buff[1000];
char strptrArr[NUMBER_OF_STRINGS] [MAX_STRING_SIZE];
const char *search = "GLROX";
fp = fopen("D:/CandC++/inputGLORX.txt", "r");
if(fp==NULL)
printf("It is a null pointer");
while(!feof(fp))
{
//fscanf(fp, "%s", buff);
fgets(buff, 1000,fp);
}
int len = strlen(buff);
printf("length is %d\n",len);
int count = 0;
char *store;
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++;
}
printf("count is %d\n",count);
while (count!=0) {
seqFound();
count--;
}
return 0;
}

As said in the comment, their are at least 2 problems in the code: your fgets will only fetch the last line (if it fetch one at all ? In any case, this is not what you want), and you are incrementing the search string instead of the buff string.
Something like this should fix most of your problems, as long as no lines in your file are longer than 999 characters. This will not work properly if you use the \n or NULL characters in your search string.
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
Here is a main for testing. I used argv to provide the input.txt and the search string.
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv)
{
FILE *fp;
char buff[1000];
char *search;
if (argc < 3)
return (-1);
search = argv[2];
if (search[0] == '\0')
return (-1);
if ((fp = fopen(argv[1], "r")) == NULL)
return (-1);
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
printf("Match found: %d\n", count);
return 0;
}

The way you search in buff is wrong, i.e. this code:
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++; // <------- ups
}
When you have a hit, you change search, i.e. the string you are looking for. That's not what you want. The search string (aka the needle) shall be the same all the time. Instead you want to move forward in the buffer buff so that you can search in the remainder of the buffer.
That could be something like:
int main()
{
const char* buff = "GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX";
const char* search = "GLROX";
const char* remBuff = buff; // Pointer to the remainder of buff
// Initialized to be the whole buffer
const char* hit;
int cnt = 0;
while((hit = strstr(remBuff, search))) // Search in the remainder of buff
{
++cnt;
remBuff = hit + 1; // Update the remainder pointer so it points just 1 char
// after the current hit
}
printf("Found substring %d times\n", cnt);
return 0;
}
Output:
Found substring 15 times

Related

C programming: lines of text file to integer array

I want to change my input.txt file to an integer array.
But sadly I keep missing one integer whenever new-line-character is met.
Following is my main()
int main(int args, char* argv[]) {
int *val;
char *STRING = readFile();
val = convert(STRING);
return 0;
}
Following is my file input function
char *readFile() {
int count;
FILE *fp;
fp = fopen("input.txt", "r");
if(fp==NULL) printf("File is NULL!n");
char* STRING;
char oneLine[255];
STRING = (char*)malloc(255);
assert(STRING!=NULL);
while(1){
fgets(oneLine, 255, fp);
count += strlen(oneLine);
STRING = (char*)realloc(STRING, count+1);
strcat(STRING, oneLine);
if(feof(fp)) break;
}
fclose(fp);
return STRING;
}
Following is my integer array function
int *convert(char *STRING){
int *intarr;
intarr = (int*)malloc(sizeof(int)*16);
int a=0;
char *ptr = strtok(STRING, " ");
while (ptr != NULL){
intarr[a] = atoi(ptr);
printf("number = %s\tindex = %d\n", ptr, a);
a++;
ptr = strtok(NULL, " ");
}
return intarr;
}
There are many issues.
This is a corrected version of your program, all comments are mine. Minimal error checking is done for brevity. intarr = malloc(sizeof(int) * 16); will be a problem if there are more than 16 numbers in the file, this should be handled somehow, for example by growing intarr with realloc, similar to what you're doing in readFile.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
char *readFile() {
FILE *fp;
fp = fopen("input.txt", "r");
if (fp == NULL)
{
printf("File is NULL!n");
return NULL; // abort if file could not be opened
}
#define MAXLINELENGTH 255 // define a constant rather than hardcoding "255" at several places
char* STRING;
char oneLine[MAXLINELENGTH];
STRING = malloc(MAXLINELENGTH);
int count = MAXLINELENGTH; // count mus be initialized and better declare it here
assert(STRING != NULL);
STRING[0] = 0; // memory pointed by STRING must be initialized
while (fgets(oneLine, MAXLINELENGTH, fp) != NULL) // correct usage of fgets
{
count += strlen(oneLine);
STRING = realloc(STRING, count + 1);
strcat(STRING, oneLine);
}
fclose(fp);
return STRING;
}
int *convert(char *STRING, int *nbofvalues) { // nbofvalues for returning the number of values
int *intarr;
intarr = malloc(sizeof(int) * 16);
int a = 0;
char *ptr = strtok(STRING, " \n"); // strings may be separated by '\n', or ' '
*nbofvalues = 0;
while (ptr != NULL) {
intarr[a] = atoi(ptr);
printf("number = %s\tindex = %d\n", ptr, a);
a++;
ptr = strtok(NULL, " \n"); // strings are separated by '\n' or ' '
} // read the fgets documentation which
// terminates read strings by \n
*nbofvalues = a; // return number of values
return intarr;
}
int main(int args, char* argv[]) {
int *val;
char *STRING = readFile();
if (STRING == NULL)
{
printf("readFile() problem\n"); // abort if file could not be read
return 1;
}
int nbvalues;
val = convert(STRING, &nbvalues); // nbvalues contains the number of values
// print numbers
for (int i = 0; i < nbvalues; i++)
{
printf("%d: %d\n", i, val[i]);
}
free(val); // free memory
free(STRING); // free memory
return 0;
}
I'm not sure what your requirement is, but this can be simplified a lot because there is no need to read the file into memory and then convert the strings into number. You could convert the numbers on the fly as you read them. And as already mentioned in a comment, calling realloc for each line is inefficient. There is room for more improvements.

C Turning lines of an archive into arrays

I have an archive and I want to turn every line into an array: v[i].data.
However, when I run the code it shows zeros for the arrays.
Is there anything I should change?
Input
1760
02/20/18,11403.7
02/19/18,11225.3
02/18/18,10551.8
02/17/18,11112.7
02/16/18,10233.9
Actual Output
1761
0
Expected Output
1761
02/20/18,11403.7
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
typedef struct{
char data[20];
}vetor;
int main(int argc,char *argv[]){
FILE *csv;
if((csv=fopen(argv[1], "r")) == NULL )
{
printf("not found csv\n");
exit(1);
}
long int a=0;
char linha[256];
char *token = NULL;
if(fgets(linha, sizeof(linha), csv)) //counting lines
{
token = strtok(linha, "\n");
a =(1 + atoi(token));
}
printf("%d\n", a);
rewind(csv);
vetor *v;
v=(vetor*)malloc(a*sizeof(vetor));
char linha2[256];
while (fgets(linha2, sizeof(linha2), csv) != 0)
{
fseek(csv, +1, SEEK_CUR);
for(int i=0;i<a;i++)
{
fscanf(csv, "%[^\n]", v[i].data);
}
}
printf("%s\n", v[0].data);
fclose(csv);
return 0;
}
There were a number of mistakes so I went ahead and rewrote the problem areas with comments explaining what I did
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct{
char data[20];
}vetor;
int main(int argc,char *argv[]){
FILE *csv;
if((csv=fopen(argv[1], "r")) == NULL )
{
printf("not found csv\n");
exit(1);
}
char line[20];
// Read number of lines
int num_lines = 0;
if (!fgets(line, sizeof(line), csv)) {
printf("Cannot read line\n");
exit(1);
}
char* token = strtok(line, "\n");
num_lines = atoi(token) + 1;
vetor* v = malloc(num_lines * sizeof(vetor));
// Fill in vetor
int i = 0;
while (fgets(line, sizeof(line), csv) != NULL) {
int len = strlen(line);
line[len-1] = '\0'; // replace newline with string terminator
strcpy(v[i].data, line); //copy line into v[i].data
i++;
}
printf("%d\n", num_lines);
for (i = 0; i < num_lines; i++) {
printf("%s\n", v[i].data);
}
return 0;
}
I think the main mistake was a misunderstanding of how best to read in each line of information. If I understood correctly you want each 02/20/18,11403.7 line to be an element in the vetor array.
The easiest way is to simply get each line one at a time with fgets
while (fgets(line, sizeof(line), csv) != NULL)
Change the ending character from newline to the string terminating character '\0'
int len = strlen(line);
line[len-1] = '\0';
Then copy the string into the ith element of vetor and update i for the next iteration of the loop.
strcpy(v[i].data, line);
i++;

Longest and Shortest word in a file C programming

Hello and good evening,
So i'm writing a program in C, that will accept a file.txt as an input and read the text. The program should read the text file, find the longest and shortest word within the file, and print them out when it reaches the end.
I'm REALLY close but i'm getting a seg fault and, not only do I not know why, but i'm at a loss as to how to fix it.
Here's the code:
#include <stdio.h>
#include <string.h>
FILE *fp;
char str[60];
char *largest;
char *smallest;
char *word;
int i, j;
int main (int argc, char **argv) {
// check that there are only two arguments
if (argc == 2) {
fp = fopen(argv[1], "r");
}
// if not throw this error
else {
perror("Argument error.");
return (-1);
}
// check if the file exists
if (fp == NULL) {
perror("Error opening file.");
return (-1);
}
// set largest to first string and smallest to second
largest = strcpy(largest, strtok(str, " "));
smallest = strcpy(smallest, strtok(NULL, " "));
word = strcpy(word, strtok(str, " "));
// while we get lines of the file
while (fgets (str, 60, fp) != NULL) {
// while the token string isn't empty
while (word != NULL) {
if (strlen(largest) > strlen(word)) {
strcpy(word, largest);
}
if (strlen(smallest) < strlen(word)) {
strcpy(word, smallest);
}
}
}
printf("The largest word in the file is: %s", largest);
printf("The smallest word in the file is: %s", smallest);
fclose(fp);
return 0;
}
I'm pretty sure it's the second while loop...I don't want to use it anyway, but i've been hacking at this for so long it's all my logic can think of.
Any help would be appreciated. This IS homework, though only a small part of it, and i'm not asking for helping solving the entire problem.
Also, there is a Makefile involved...I don't think it's important post but feel free to ask me and i'll update.
As I built this I can confirm that the file is able to read and I can print, put, and do all kinds of cool things. It only broke when I tried to implement my logic for longest/shortest word.
Thanks!
There are some problems with your logic. Try the below code
Few assumptions I made are,
Maximum word length is 20 characters. You can change it by MAX_WORD_LENGTH macro.
Words in file are space separated
Max line length is 60 characters
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_WORD_LENGTH 20
int main (int argc, char **argv)
{
FILE *fp;
char str[60];
char *largest = (char*) malloc (MAX_WORD_LENGTH);
char *smallest = (char*) malloc (MAX_WORD_LENGTH);
int smallest_len = MAX_WORD_LENGTH, largest_len = 0;
if (argc == 2)
{
fp = fopen(argv[1], "r");
}
else
{
printf("Argument error.");
return (-1);
}
if (fp == NULL)
{
printf("Error opening file.");
return (-1);
}
while (fgets (str, 60, fp) != NULL)
{
char *temp = strtok(str, " ");
while (temp != NULL)
{
if (strlen(temp) > largest_len)
{
strcpy(largest, temp);
largest_len = strlen(largest);
}
if (strlen(temp) < smallest_len)
{
strcpy(smallest, temp);
smallest_len = strlen(smallest);
}
temp = strtok(NULL, " ");
}
}
printf("The largest word in the file is: %s\n", largest);
printf("The smallest word in the file is: %s\n", smallest);
fclose(fp);
return 0;
}
Adopt it for your needs with argv[2]
and enjoy
#include <stdio.h>
#include <string.h>
int main() {
const int max_word_length = 60;
char longest[max_word_length];
char shortest[max_word_length];
char current[max_word_length];
size_t longest_length = 0;
size_t shortest_length = max_word_length;
size_t current_length = 0;
freopen("input", "r", stdin);
freopen("output", "w", stdout);
while (scanf("%s", current) > 0) {
current_length = strlen(current);
if ( current_length > longest_length) {
longest_length = current_length;
strcpy(longest, current);
}
if (current_length < shortest_length) {
shortest_length = current_length;
strcpy(shortest, current);
}
}
printf("%s %s", shortest, longest);
return 0;
}
So I just ran this online and it looks like the seg fault is caused by the line
largest = strcpy(largest, strtok(str, " "));
This is because largest is the destination of the string copy in the call
strcpy(largest, strtok(str, " "));
but it's a pointer that points to nothing. It should be declared as an actual array like this:
char largest[60];
Also, you shouldn't be calling strtok on a str when it hasn't been initialized to anything. It's not even a proper string so there's no way strtok is going to do anything useful at that point.

Read from a text file and use each line to compare if they are anagrams

I must modify my program to accept input from
a file called anagrams.txt.This file should have two strings per line, separated by the # character. My program should read
each pair of strings and report back if each pair of strings is an anagram. For example consider the following content of anagrams.txt:
hello#elloh
man#nam
Astro#Oastrrasd
Your program should print out the following:
hello#elloh - Anagrams!
man#nam - Anagrams!
Astro#Oastrrasd- Not anagrams!
I should compile in g++
Here is the code to read from text:
int main()
{
char input[30];
if(access( "anagrams.txt", F_OK ) != -1) {
FILE *ptr_file;
char buf[1000];
ptr_file =fopen("anagrams.txt","r"); if (!ptr_file)
return 1;
while (fgets(buf,1000, ptr_file)!=NULL)
printf("%s",buf);
fclose(ptr_file);
printf("\n");
}
else{ //if file does not exist
printf("\nFile not found!\n");
}
return 0;
}
Code to find if the text are anagrams:
#include <stdio.h>
int find_anagram(char [], char []);
int main()
{
char array1[100], array2[100];
int flag;
printf("Enter the string\n");
gets(array1);
printf("Enter another string\n");
gets(array2);
flag = find_anagram(array1, array2);
if (flag == 1)
printf(" %s and %s are anagrams.\n", array1, array2);
else
printf("%s and %s are not anagrams.\n", array1, array2);
return 0;
}
int find_anagram(char array1[], char array2[])
{
int num1[26] = {0}, num2[26] = {0}, i = 0;
while (array1[i] != '\0')
{
num1[array1[i] - 'a']++;
i++;
}
i = 0;
while (array2[i] != '\0')
{
num2[array2[i] -'a']++;
i++;
}
for (i = 0; i < 26; i++)
{
if (num1[i] != num2[i])
return 0;
}
return 1;
}
You can try something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXLINE 1000
#define MAXLETTER 256
int is_anagram(char *word1, char *word2);
void check_lines(FILE *filename);
int cmpfunc(const void *a, const void *b);
void convert_to_lowercase(char *word);
int
main(int argc, char const *argv[]) {
FILE *filename;
if ((filename = fopen("anagram.txt", "r")) == NULL) {
fprintf(stderr, "Error opening file\n");
exit(EXIT_FAILURE);
}
check_lines(filename);
fclose(filename);
return 0;
}
void
check_lines(FILE *filename) {
char line[MAXLINE];
char *word1, *word2, *copy1, *copy2;
while (fgets(line, MAXLINE, filename) != NULL) {
word1 = strtok(line, "#");
word2 = strtok(NULL, "\n");
copy1 = strdup(word1);
copy2 = strdup(word2);
convert_to_lowercase(copy1);
convert_to_lowercase(copy2);
if (is_anagram(copy1, copy2)) {
printf("%s#%s - Anagrams!\n", word1, word2);
} else {
printf("%s#%s - Not Anagrams!\n", word1, word2);
}
}
}
void
convert_to_lowercase(char *word) {
int i;
for (i = 0; word[i] != '\0'; i++) {
word[i] = tolower(word[i]);
}
}
int
is_anagram(char *word1, char *word2) {
qsort(word1, strlen(word1), sizeof(*word1), cmpfunc);
qsort(word2, strlen(word2), sizeof(*word2), cmpfunc);
if (strcmp(word1, word2) == 0) {
return 1;
}
return 0;
}
int
cmpfunc(const void *a, const void *b) {
if ((*(char*)a) < (*(char*)b)) {
return -1;
}
if ((*(char*)a) > (*(char*)b)) {
return +1;
}
return 0;
}
Since this looks like a University question, I won't provide a full solution, only a hint.
All you have to do is replace the stdin input part of the anagram-finding file with the code you wrote to read from a file: it's as simple as changing
printf("Enter the string\n");
gets(array1);
printf("Enter another string\n");
gets(array2);
to
// before program:
#define SIZE 1000
// inside main
if (access("anagrams.txt", F_OK) == -1){
printf("\nFile not found!\n");
return 1; // Abort the program early if we can't find the file
}
FILE *ptr_file;
char buf[1000];
ptr_file = fopen("anagrams.txt","r");
if (!ptr_file)
return 1;
char array1[SIZE], array2[SIZE];
while (fgets(buf, 1000, ptr_file)!=NULL){
// do all your anagram stuff here!
// there is currently one line of the input file stored in buf
// Hint: You need to split buf into array_1 and array_2 using '#' to separate it.
}
fclose(ptr_file);
printf("\n");
Additional comments:
Don't ever ever ever use gets. gets doesn't check that the string it writes to can hold the data, which will cause your program to crash if it gets input bigger than the array size. Use fgets(buf, BUF_SIZE, stdin) instead.
Beautiful code is good code. People are more likely to help if they can read your code easily. (fix your brackets)
Just for interest, a more efficient algorithm for checking anagrams is to use qsort to sort both arrays, then a simple string matcher to compare them. This will have cost O(mnlog(m+n)), as opposed to O(m^2 n^2), awith the current algorithm
You need to split every line you read by fgets (as you did) in to two strings, and pass them to your find_anagram function. You can do that using strtok:
int main()
{
int flag;
char buf[1000];
FILE *ptr_file;
//Check file existence
//Open the file for reading
while (fgets (buf, 1000, ptr_file) != NULL)
{
char *array1 = strtok(buf, "#");
char *array2 = strtok(NULL, "\n");
flag = find_anagram (array1, array2);
//Check flag value to print your message
}
return 0;
}
//put your find_anagram function
Don't forget to #include <string.h> to use strtok().

How to return the position of word in the line, C language

I have some problems with finding the position of the word in the text file.
The problem is when user enters a word to search, the program have to show line number and position in this line. My code is only shows line number but how I can add the position of word at this line?
P.S.I think I have used not efficient way in searching function...
Here is the code:
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
int Search_in_File(char *str);
int main(int argc, const char * argv[]) {
char word;
printf("Please enter a word to search:\n");
scanf("%s", &word);
Search_in_File(&word);
}
int Search_in_File(char *str) {
FILE *fp;
int line_num = 1;
int find_result = 0;
char temp[1024];
if((fp = fopen("/Users/S/Documents/Learning/C/text.txt","r")) == NULL) {
return(1);
}
while(fgets(temp, 1024, fp) != NULL)
{
if((strstr(temp, str)) != NULL)
{
find_result++;
printf("A match found on line: %d at position: \n", line_num);
}
line_num++;
}
if(find_result == 0) {
printf("\nSorry, couldn't find a match.\n");
}
fclose(fp);
return(0);
}
The function strstr() returns a pointer to the occurrence of the word in the line. Pointer arithmetic can be used to determine the position index:
char* found_at = strstr( temp, str ) ;
if( found_at != NULL)
{
int column = found_at - temp + 1 ; // Plus 1 if left is considered column 1.
find_result++;
printf( "A match found on line: %d at position: %d\n", line_num, column );
}
However while that will find the location of the found string, it does not have any sense of what constitutes a "word". For example, it will find "man" in "mankind" for example.
char word;
You have declared a char variable , but in order to input word you need a char array . Right now word can store only a single character.
Declare a sufficient enough char array -
char word[50];
scanf("%49s", word);
Search_in_File(word); // no need to pass address
EDIT-
And for your problem with getting position , you can make use of function strtok instead of strstr -
int position;
char *token;
while(fgets(temp, 1024, fp) != NULL)
{
position =0; //set position to 0 in each iteration
token=strtok(temp," "); // search for space
while(token!=NULL){
position++; // as words are tokenized count spaces
if(strcmp(token,str)==0){ //check for your word
find_result++;
printf("A match found on line: %d at position: %d\n", line_num,position);
}
token=strtok(NULL," ");
}
line_num++;
}

Resources