Read from a text file and use each line to compare if they are anagrams - c

I must modify my program to accept input from
a file called anagrams.txt.This file should have two strings per line, separated by the # character. My program should read
each pair of strings and report back if each pair of strings is an anagram. For example consider the following content of anagrams.txt:
hello#elloh
man#nam
Astro#Oastrrasd
Your program should print out the following:
hello#elloh - Anagrams!
man#nam - Anagrams!
Astro#Oastrrasd- Not anagrams!
I should compile in g++
Here is the code to read from text:
int main()
{
char input[30];
if(access( "anagrams.txt", F_OK ) != -1) {
FILE *ptr_file;
char buf[1000];
ptr_file =fopen("anagrams.txt","r"); if (!ptr_file)
return 1;
while (fgets(buf,1000, ptr_file)!=NULL)
printf("%s",buf);
fclose(ptr_file);
printf("\n");
}
else{ //if file does not exist
printf("\nFile not found!\n");
}
return 0;
}
Code to find if the text are anagrams:
#include <stdio.h>
int find_anagram(char [], char []);
int main()
{
char array1[100], array2[100];
int flag;
printf("Enter the string\n");
gets(array1);
printf("Enter another string\n");
gets(array2);
flag = find_anagram(array1, array2);
if (flag == 1)
printf(" %s and %s are anagrams.\n", array1, array2);
else
printf("%s and %s are not anagrams.\n", array1, array2);
return 0;
}
int find_anagram(char array1[], char array2[])
{
int num1[26] = {0}, num2[26] = {0}, i = 0;
while (array1[i] != '\0')
{
num1[array1[i] - 'a']++;
i++;
}
i = 0;
while (array2[i] != '\0')
{
num2[array2[i] -'a']++;
i++;
}
for (i = 0; i < 26; i++)
{
if (num1[i] != num2[i])
return 0;
}
return 1;
}

You can try something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXLINE 1000
#define MAXLETTER 256
int is_anagram(char *word1, char *word2);
void check_lines(FILE *filename);
int cmpfunc(const void *a, const void *b);
void convert_to_lowercase(char *word);
int
main(int argc, char const *argv[]) {
FILE *filename;
if ((filename = fopen("anagram.txt", "r")) == NULL) {
fprintf(stderr, "Error opening file\n");
exit(EXIT_FAILURE);
}
check_lines(filename);
fclose(filename);
return 0;
}
void
check_lines(FILE *filename) {
char line[MAXLINE];
char *word1, *word2, *copy1, *copy2;
while (fgets(line, MAXLINE, filename) != NULL) {
word1 = strtok(line, "#");
word2 = strtok(NULL, "\n");
copy1 = strdup(word1);
copy2 = strdup(word2);
convert_to_lowercase(copy1);
convert_to_lowercase(copy2);
if (is_anagram(copy1, copy2)) {
printf("%s#%s - Anagrams!\n", word1, word2);
} else {
printf("%s#%s - Not Anagrams!\n", word1, word2);
}
}
}
void
convert_to_lowercase(char *word) {
int i;
for (i = 0; word[i] != '\0'; i++) {
word[i] = tolower(word[i]);
}
}
int
is_anagram(char *word1, char *word2) {
qsort(word1, strlen(word1), sizeof(*word1), cmpfunc);
qsort(word2, strlen(word2), sizeof(*word2), cmpfunc);
if (strcmp(word1, word2) == 0) {
return 1;
}
return 0;
}
int
cmpfunc(const void *a, const void *b) {
if ((*(char*)a) < (*(char*)b)) {
return -1;
}
if ((*(char*)a) > (*(char*)b)) {
return +1;
}
return 0;
}

Since this looks like a University question, I won't provide a full solution, only a hint.
All you have to do is replace the stdin input part of the anagram-finding file with the code you wrote to read from a file: it's as simple as changing
printf("Enter the string\n");
gets(array1);
printf("Enter another string\n");
gets(array2);
to
// before program:
#define SIZE 1000
// inside main
if (access("anagrams.txt", F_OK) == -1){
printf("\nFile not found!\n");
return 1; // Abort the program early if we can't find the file
}
FILE *ptr_file;
char buf[1000];
ptr_file = fopen("anagrams.txt","r");
if (!ptr_file)
return 1;
char array1[SIZE], array2[SIZE];
while (fgets(buf, 1000, ptr_file)!=NULL){
// do all your anagram stuff here!
// there is currently one line of the input file stored in buf
// Hint: You need to split buf into array_1 and array_2 using '#' to separate it.
}
fclose(ptr_file);
printf("\n");
Additional comments:
Don't ever ever ever use gets. gets doesn't check that the string it writes to can hold the data, which will cause your program to crash if it gets input bigger than the array size. Use fgets(buf, BUF_SIZE, stdin) instead.
Beautiful code is good code. People are more likely to help if they can read your code easily. (fix your brackets)
Just for interest, a more efficient algorithm for checking anagrams is to use qsort to sort both arrays, then a simple string matcher to compare them. This will have cost O(mnlog(m+n)), as opposed to O(m^2 n^2), awith the current algorithm

You need to split every line you read by fgets (as you did) in to two strings, and pass them to your find_anagram function. You can do that using strtok:
int main()
{
int flag;
char buf[1000];
FILE *ptr_file;
//Check file existence
//Open the file for reading
while (fgets (buf, 1000, ptr_file) != NULL)
{
char *array1 = strtok(buf, "#");
char *array2 = strtok(NULL, "\n");
flag = find_anagram (array1, array2);
//Check flag value to print your message
}
return 0;
}
//put your find_anagram function
Don't forget to #include <string.h> to use strtok().

Related

Find number of occurrences for the substring in a string using C programming

I am trying a program in c to read a text file that contains array of characters or a string and find the number of occurrences of the substring called "GLROX" and say sequence found when it is found. And the "inputGLORX.txt" contains following string inside it.
GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX
But i am getting wierd results. It would be great if some expert in C-programming helps me to solve this and thanks in advance.
#include <stdio.h>
#include <conio.h>
#include <string.h>
#define NUMBER_OF_STRINGS 40
#define MAX_STRING_SIZE 7
void seqFound()
{
printf("Sequence Found\n");
}
int main()
{
FILE *fp;
char buff[1000];
char strptrArr[NUMBER_OF_STRINGS] [MAX_STRING_SIZE];
const char *search = "GLROX";
fp = fopen("D:/CandC++/inputGLORX.txt", "r");
if(fp==NULL)
printf("It is a null pointer");
while(!feof(fp))
{
//fscanf(fp, "%s", buff);
fgets(buff, 1000,fp);
}
int len = strlen(buff);
printf("length is %d\n",len);
int count = 0;
char *store;
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++;
}
printf("count is %d\n",count);
while (count!=0) {
seqFound();
count--;
}
return 0;
}
As said in the comment, their are at least 2 problems in the code: your fgets will only fetch the last line (if it fetch one at all ? In any case, this is not what you want), and you are incrementing the search string instead of the buff string.
Something like this should fix most of your problems, as long as no lines in your file are longer than 999 characters. This will not work properly if you use the \n or NULL characters in your search string.
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
Here is a main for testing. I used argv to provide the input.txt and the search string.
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv)
{
FILE *fp;
char buff[1000];
char *search;
if (argc < 3)
return (-1);
search = argv[2];
if (search[0] == '\0')
return (-1);
if ((fp = fopen(argv[1], "r")) == NULL)
return (-1);
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
printf("Match found: %d\n", count);
return 0;
}
The way you search in buff is wrong, i.e. this code:
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++; // <------- ups
}
When you have a hit, you change search, i.e. the string you are looking for. That's not what you want. The search string (aka the needle) shall be the same all the time. Instead you want to move forward in the buffer buff so that you can search in the remainder of the buffer.
That could be something like:
int main()
{
const char* buff = "GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX";
const char* search = "GLROX";
const char* remBuff = buff; // Pointer to the remainder of buff
// Initialized to be the whole buffer
const char* hit;
int cnt = 0;
while((hit = strstr(remBuff, search))) // Search in the remainder of buff
{
++cnt;
remBuff = hit + 1; // Update the remainder pointer so it points just 1 char
// after the current hit
}
printf("Found substring %d times\n", cnt);
return 0;
}
Output:
Found substring 15 times

C Turning lines of an archive into arrays

I have an archive and I want to turn every line into an array: v[i].data.
However, when I run the code it shows zeros for the arrays.
Is there anything I should change?
Input
1760
02/20/18,11403.7
02/19/18,11225.3
02/18/18,10551.8
02/17/18,11112.7
02/16/18,10233.9
Actual Output
1761
0
Expected Output
1761
02/20/18,11403.7
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
typedef struct{
char data[20];
}vetor;
int main(int argc,char *argv[]){
FILE *csv;
if((csv=fopen(argv[1], "r")) == NULL )
{
printf("not found csv\n");
exit(1);
}
long int a=0;
char linha[256];
char *token = NULL;
if(fgets(linha, sizeof(linha), csv)) //counting lines
{
token = strtok(linha, "\n");
a =(1 + atoi(token));
}
printf("%d\n", a);
rewind(csv);
vetor *v;
v=(vetor*)malloc(a*sizeof(vetor));
char linha2[256];
while (fgets(linha2, sizeof(linha2), csv) != 0)
{
fseek(csv, +1, SEEK_CUR);
for(int i=0;i<a;i++)
{
fscanf(csv, "%[^\n]", v[i].data);
}
}
printf("%s\n", v[0].data);
fclose(csv);
return 0;
}
There were a number of mistakes so I went ahead and rewrote the problem areas with comments explaining what I did
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct{
char data[20];
}vetor;
int main(int argc,char *argv[]){
FILE *csv;
if((csv=fopen(argv[1], "r")) == NULL )
{
printf("not found csv\n");
exit(1);
}
char line[20];
// Read number of lines
int num_lines = 0;
if (!fgets(line, sizeof(line), csv)) {
printf("Cannot read line\n");
exit(1);
}
char* token = strtok(line, "\n");
num_lines = atoi(token) + 1;
vetor* v = malloc(num_lines * sizeof(vetor));
// Fill in vetor
int i = 0;
while (fgets(line, sizeof(line), csv) != NULL) {
int len = strlen(line);
line[len-1] = '\0'; // replace newline with string terminator
strcpy(v[i].data, line); //copy line into v[i].data
i++;
}
printf("%d\n", num_lines);
for (i = 0; i < num_lines; i++) {
printf("%s\n", v[i].data);
}
return 0;
}
I think the main mistake was a misunderstanding of how best to read in each line of information. If I understood correctly you want each 02/20/18,11403.7 line to be an element in the vetor array.
The easiest way is to simply get each line one at a time with fgets
while (fgets(line, sizeof(line), csv) != NULL)
Change the ending character from newline to the string terminating character '\0'
int len = strlen(line);
line[len-1] = '\0';
Then copy the string into the ith element of vetor and update i for the next iteration of the loop.
strcpy(v[i].data, line);
i++;

adding char into an array and returning

Im new to c and am trying to understand pointers.
here I am opening a file and reading the lines given. Im trying to append these lines into an array and return it from the function. I dont seem to be appending or accessing the array correctly. output[count] = status; gives an error with mismatched char and char *.
Im essentially trying to get an array with a list of words given by a file where each element in the array is a word.
char *fileRead(char *command, char output[255]) {
int count = 0;
char input[255];
char *status;
FILE *file = fopen(command, "r");
if (file == NULL) {
printf("Cannot open file\n");
} else {
do {
status = fgets(input, sizeof(input), file);
if (status != NULL) {
printf("%s", status);
strtok(status, "\n");
// add values into output array
output[count] = status;
++count;
}
} while (status);
}
fclose(file);
return output;
}
I access fileRead via:
...
char commandArray[255];
char output[255];
int y = 0;
char *filename = "scriptin.txt";
strcpy(commandArray, fileRead(filename, output));
// read from array and pass into flag function
while (commandArray[y] != NULL) {
n = flagsfunction(flags, commandArray[y], sizeof(buf), flags.position, &desc, &parentrd, right, left, lconn);
y++;
...
Example of Read from file Line by line then storing nonblank lines into an array (array of pointer to char (as char*))
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//for it does not exist because strdup is not a standard function.
char *strdup(const char *str){
char *ret = malloc(strlen(str)+1);
if(ret)
strcpy(ret, str);
return ret;
}
//Read rows up to 255 rows
int fileRead(const char *filename, char *output[255]) {
FILE *file = fopen(filename, "r");
if (file == NULL) {
perror("Cannot open file:");
return 0;
}
int count = 0;
char input[255];
while(count < 255 && fgets(input, sizeof(input), file)) {
char *line = strtok(input, "\n");
if(line)//When it is not a blank line
output[count++] = strdup(line);//Store replica
}
fclose(file);
return count;
}
int main(void){
char *output[255];//(`char *` x 255)
int number_of_read_line = fileRead("data.txt", output);
for(int i = 0; i < number_of_read_line; ++i){
printf("%s\n", output[i]);
free(output[i]);//Discard after being used
}
return 0;
}

Problems with string arrays, strcpy and strings

I'm having real trouble working with strings and string arrays, and using strcpy correctly. I'm using a dictionary of words scanned in a 2D array dictionary. Then I take a start word, alter every letter of it to create many different variants, i.e cat -> cbt, cct, cdt, etc. From there I copy each generated word into a 2D array and to compare these generated words to the dictionary to see if they are real words. I then want to print these real words, i.e cat as a start word will generate bat if its in the dictionary, but zat won't be. When I run the code it prints all the generated words but when It gets to check_dictionary function it prints no words.
The text file it reads from is like:
mat
yes
cat
hat
The code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_WORDS 20000
#define MAX_WORD_LENGTH 30
#define ARGS_REQUIRED 2
typedef struct scanned_words
{
char startword[MAX_WORD_LENGTH];
char endword[MAX_WORD_LENGTH];
} Scanned_words;
Scanned_words scan_two_words(Scanned_words words);
void get_next_word(Scanned_words words,
char parentwords[MAX_WORDS][MAX_WORD_LENGTH]);
void read_file(char * argv[], char dictionary[MAX_WORDS][MAX_WORD_LENGTH]);
void check_dictionary(char dictionary[MAX_WORDS][MAX_WORD_LENGTH],
char parentwords[MAX_WORDS][MAX_WORD_LENGTH]);
void usage(char * argv[]);
int main(int argc, char * argv[])
{
char dictionary[MAX_WORDS][MAX_WORD_LENGTH];
char nextword[MAX_WORDS][MAX_WORD_LENGTH];
char parentwords[MAX_WORDS][MAX_WORD_LENGTH];
Scanned_words words;
if (argc == ARGS_REQUIRED)
{
system("clear");
read_file(&argv[1], dictionary);
words = scan_two_words(words);
get_next_word(words, parentwords);
check_dictionary(dictionary, parentwords);
}
else
{
usage(&argv[0]);
}
return 0;
}
void read_file(char * argv[], char dictionary[MAX_WORDS][MAX_WORD_LENGTH])
//reads the text file and stores the dictonary as a 2D array
{
FILE * file_name;
int word_count = 0, i;
if ((file_name = fopen(argv[0], "r")) == NULL )
{
printf("Cannot open file ... \n");
}
while (fscanf(file_name, "%s", dictionary[i++]) == 1)
{
printf("%s ", dictionary[word_count]);
word_count++;
}
printf("\n");
printf("\n%d words scanned in from: %s\n\n", word_count, argv[0]);
}
Scanned_words scan_two_words(Scanned_words words)
//takes an empty structure, scans both words in and returns them in the same structure
{
printf("Enter the start word: \n");
scanf("%s", words.startword);
printf("\nEnter the end word: \n");
scanf("%s", words.endword);
printf("\n");
return words;
}
void get_next_word(Scanned_words words,
char parentwords[MAX_WORDS][MAX_WORD_LENGTH])
//get all eligible second words from original start word
{
char character;
char currentword[MAX_WORD_LENGTH];
int i;
strcpy(currentword, words.startword);
for (i = 0; currentword[i] != '\0'; i++)
{
strcpy(currentword, words.startword);
for (character = 'a'; character <= 'z'; character++)
{
currentword[i] = character;
strcpy(parentwords[i], currentword);
printf("%s ", parentwords[i]);
}
}
parentwords[i][0] = '\0';
printf("\n\n");
}
void check_dictionary(char dictionary[MAX_WORDS][MAX_WORD_LENGTH],
char parentwords[MAX_WORD_LENGTH][MAX_WORD_LENGTH])
//checks a generated word for eligibility against the dictionary, prints next generation words
{
int i, j;
printf("\nSecond words: \n\n");
for (j = 0; parentwords[j][0] != '\0'; j++)
;
{
for (i = 0; dictionary[i][0] != '\0'; i++)
{
if ((strcmp(dictionary[i], parentwords[j])) == 0)
{
printf("%s \n", parentwords[j]);
}
}
}
}
void usage(char * argv[])
//prints error message
{
printf("Incorrect usage, try: ./program_name %s\n", argv[1]);
}
The formatting revealed this:
for (j = 0; parentwords[j][0] != '\0'; j++)
;
which most probably was meant to be:
for (j = 0; parentwords[j][0] != '\0'; j++)
Here
while (fscanf(file_name, "%s", dictionary[i++]) == 1)
the i is used uninitialised
So change it definition to include an initialisation:
int word_count = 0, i = 0;

merging and sorting a text file in C

I am supoosed write a function that reads two text files line by line, compare them, delete the duplicates and but them into a third file in alphabetical order...I have been working on this for over a month and I am still stuck I have tried several ways to do this and come up with nothing...I was in formed that i have to use strcmp to do this and I cant use any other predefined sorting function...I have also looked around on this site and cannot find much that helps with this...any help would be greatly appreciated..Here is what I have so far:
#include<stdio.h>
#include<string.h>
main (void)
{
char str [200];
char str2 [200];
char new [100];
char temp [100];
int row = 10;
FILE *fa = fopen ("book1.dat", "r");
FILE *fb = fopen ("book2.dat", "r");
FILE *fc = fopen ("fixed.txt", "w");
int i;
int j;
int k;
while (fgets (str, 200, fa) !=NULL && fgets (str2, 200, fb) !=NULL)
{
puts(str);
puts(str2);
if (strcmp( str, str2) ==0 )
{
strcpy (str , new);
} else {
strcpy (new, str);
strcpy (new, str2);
}
}
for ( i = 0; i < row; i++)
{
for (j = i+1; j< row; j++)
{
if(strcmp(new[i], new [j]) > 0)
{
strcpy (temp, new);
strcpy(new, new);
strcpy(new, temp);
}
}
}
for (i = 0; i < length; i ++)
{
fputs(new, fc);
}
}
Your use of strcpy() is peculiar. Recall its signature:
char *strcpy(char *dest, const char *src)
Here's a usage that doesn't make immediate sense to me:
strcpy (new, str); // new now has str
strcpy (new, str2); // new now has str2
You've effectively overwritten something there. I would start from there, and see what else may not be working as you intend. Furthermore, if you can use gcc, look into using gdb as well to debug your code. (You would need to compile with the -g flag.)
First off, can you assume the duplicates from book1 and book2 line up nicely?
Think about how you would detect if the first entry in book1 is identical to the last entry in book2.
Secondly, you have to sort your output alphabetically. Sorting algorithms is kind of one of those common things that students are forced to do all the time. It builds character. For bonus kudos, implement quick sort.
sample a way.
error handling is omitted.
since we are using the sort function of the library sqort, implement your own.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LINE_MAX_SIZE 256
typedef struct filePos {
FILE *fp;
long pos;
} FilePos;
typedef struct myfile {
int lines;
int capacity;
FILE *fp;
FilePos *filePoss;
} MyFile;
MyFile *myfopen(const char *filepath){
char buff[LINE_MAX_SIZE];
MyFile *mfp;
mfp = (MyFile*)malloc(sizeof(MyFile));
mfp->lines = 0;
mfp->capacity=16;
mfp->filePoss=NULL;
mfp->filePoss=(FilePos*)realloc(mfp->filePoss, sizeof(FilePos)*(mfp->capacity *= 2));
mfp->fp = fopen(filepath, "r");
do{
mfp->filePoss[mfp->lines].fp = mfp->fp;
mfp->filePoss[mfp->lines].pos = ftell(mfp->fp);
if(++mfp->lines == mfp->capacity){
mfp->filePoss=(FilePos*)realloc(mfp->filePoss, sizeof(FilePos)*(mfp->capacity *= 2));
}
}while(NULL!=fgets(buff, LINE_MAX_SIZE, mfp->fp));
--mfp->lines;
return mfp;
}
void myfclose(MyFile *mfp){
free(mfp->filePoss);
fclose(mfp->fp);
free(mfp);
}
char *myfgets(FilePos *p, char *buff){
fseek(p->fp, p->pos, SEEK_SET);
return fgets(buff, LINE_MAX_SIZE, p->fp);
}
int myfcomp(const void *a, const void *b){
char buff_a[LINE_MAX_SIZE];
char buff_b[LINE_MAX_SIZE];
FilePos *fpa,*fpb;
fpa=(FilePos*)a;
fpb=(FilePos*)b;
myfgets(fpa, buff_a);
myfgets(fpb, buff_b);
return strcmp(buff_a, buff_b);
}
void myfsort(MyFile *mfp){
qsort(mfp->filePoss, mfp->lines, sizeof(FilePos), myfcomp);
}
void myfprint(MyFile *mfp){
char buff[LINE_MAX_SIZE];
int i;
for(i=0;i<mfp->lines ;++i)
printf("%s", myfgets(mfp->filePoss + i, buff));
}
void merge(const char *inpfile1, const char *inpfile2, const char *outfile){
FILE *fo;
MyFile *fi1, *fi2;
char buff_f1[LINE_MAX_SIZE];
char buff_f2[LINE_MAX_SIZE];
char buff_fo[LINE_MAX_SIZE];
char *outbuff=NULL;
int fi1_line, fi2_line;
int eof1, eof2;
fo=fopen(outfile, "w");
fi1=myfopen(inpfile1);
fi2=myfopen(inpfile2);
myfsort(fi1);
myfsort(fi2);
fi1_line=fi2_line=0;
eof1=eof2=0;
*buff_fo='\0';
while(1){
if(!eof1 && outbuff != buff_f2){
myfgets(&(fi1->filePoss[fi1_line]), buff_f1);
}
if(!eof2 && outbuff != buff_f1){
myfgets(&(fi2->filePoss[fi2_line]), buff_f2);
}
if(!eof1 && !eof2){
if(strcmp(buff_f1, buff_f2) <= 0){
outbuff=buff_f1;
++fi1_line;
} else {
outbuff=buff_f2;
++fi2_line;
}
} else if(!eof1 && eof2){
outbuff=buff_f1;
++fi1_line;
} else if(eof1 && !eof2){
outbuff=buff_f2;
++fi2_line;
} else {
break;
}
if(strcmp(outbuff, buff_fo) != 0){//duplicate check
strcpy(buff_fo, outbuff);
fputs(buff_fo, fo);
}
if(fi1->lines == fi1_line)
eof1 = !0;
if(fi2->lines == fi2_line)
eof2 = !0;
}
myfclose(fi2);
myfclose(fi1);
fclose(fo);
}
int main(){
merge("book1.txt", "book2.txt", "fixed.txt");
return 0;
}

Resources