Longest and Shortest word in a file C programming - c

Hello and good evening,
So i'm writing a program in C, that will accept a file.txt as an input and read the text. The program should read the text file, find the longest and shortest word within the file, and print them out when it reaches the end.
I'm REALLY close but i'm getting a seg fault and, not only do I not know why, but i'm at a loss as to how to fix it.
Here's the code:
#include <stdio.h>
#include <string.h>
FILE *fp;
char str[60];
char *largest;
char *smallest;
char *word;
int i, j;
int main (int argc, char **argv) {
// check that there are only two arguments
if (argc == 2) {
fp = fopen(argv[1], "r");
}
// if not throw this error
else {
perror("Argument error.");
return (-1);
}
// check if the file exists
if (fp == NULL) {
perror("Error opening file.");
return (-1);
}
// set largest to first string and smallest to second
largest = strcpy(largest, strtok(str, " "));
smallest = strcpy(smallest, strtok(NULL, " "));
word = strcpy(word, strtok(str, " "));
// while we get lines of the file
while (fgets (str, 60, fp) != NULL) {
// while the token string isn't empty
while (word != NULL) {
if (strlen(largest) > strlen(word)) {
strcpy(word, largest);
}
if (strlen(smallest) < strlen(word)) {
strcpy(word, smallest);
}
}
}
printf("The largest word in the file is: %s", largest);
printf("The smallest word in the file is: %s", smallest);
fclose(fp);
return 0;
}
I'm pretty sure it's the second while loop...I don't want to use it anyway, but i've been hacking at this for so long it's all my logic can think of.
Any help would be appreciated. This IS homework, though only a small part of it, and i'm not asking for helping solving the entire problem.
Also, there is a Makefile involved...I don't think it's important post but feel free to ask me and i'll update.
As I built this I can confirm that the file is able to read and I can print, put, and do all kinds of cool things. It only broke when I tried to implement my logic for longest/shortest word.
Thanks!

There are some problems with your logic. Try the below code
Few assumptions I made are,
Maximum word length is 20 characters. You can change it by MAX_WORD_LENGTH macro.
Words in file are space separated
Max line length is 60 characters
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_WORD_LENGTH 20
int main (int argc, char **argv)
{
FILE *fp;
char str[60];
char *largest = (char*) malloc (MAX_WORD_LENGTH);
char *smallest = (char*) malloc (MAX_WORD_LENGTH);
int smallest_len = MAX_WORD_LENGTH, largest_len = 0;
if (argc == 2)
{
fp = fopen(argv[1], "r");
}
else
{
printf("Argument error.");
return (-1);
}
if (fp == NULL)
{
printf("Error opening file.");
return (-1);
}
while (fgets (str, 60, fp) != NULL)
{
char *temp = strtok(str, " ");
while (temp != NULL)
{
if (strlen(temp) > largest_len)
{
strcpy(largest, temp);
largest_len = strlen(largest);
}
if (strlen(temp) < smallest_len)
{
strcpy(smallest, temp);
smallest_len = strlen(smallest);
}
temp = strtok(NULL, " ");
}
}
printf("The largest word in the file is: %s\n", largest);
printf("The smallest word in the file is: %s\n", smallest);
fclose(fp);
return 0;
}

Adopt it for your needs with argv[2]
and enjoy
#include <stdio.h>
#include <string.h>
int main() {
const int max_word_length = 60;
char longest[max_word_length];
char shortest[max_word_length];
char current[max_word_length];
size_t longest_length = 0;
size_t shortest_length = max_word_length;
size_t current_length = 0;
freopen("input", "r", stdin);
freopen("output", "w", stdout);
while (scanf("%s", current) > 0) {
current_length = strlen(current);
if ( current_length > longest_length) {
longest_length = current_length;
strcpy(longest, current);
}
if (current_length < shortest_length) {
shortest_length = current_length;
strcpy(shortest, current);
}
}
printf("%s %s", shortest, longest);
return 0;
}

So I just ran this online and it looks like the seg fault is caused by the line
largest = strcpy(largest, strtok(str, " "));
This is because largest is the destination of the string copy in the call
strcpy(largest, strtok(str, " "));
but it's a pointer that points to nothing. It should be declared as an actual array like this:
char largest[60];
Also, you shouldn't be calling strtok on a str when it hasn't been initialized to anything. It's not even a proper string so there's no way strtok is going to do anything useful at that point.

Related

Program in C to count the frequency of a given word in a text file

Program in C to count the frequency of a given word in a text file
I made this program with the purpose of counting the frequency of a given word in a text file, but in count the characters.
Need help to fix it.
#include <stdio.h>
#include <stdlib.h>
int main()
{
FILE * fptr;
char ch, * word, * a;
int i=0, p=0;
word =(char *) malloc(25 * sizeof(char));
fptr = fopen("text.txt", "r");
if (!fptr)
{
printf("File not found. \n");
}
else
{
printf("Word: ");
scanf("%s", word);
while(word[p]!='\0')
{
p++;
}
a=(char *) malloc(p * sizeof(char));
while (*(ch+a) != EOF)
{
*(ch+a) = getc(fptr);
if (*(ch+a) == * word)
{
i++;
}
}
}
if (i==0)
printf("Word not found.\n");
else
{
printf("Word found %d times.\n",i);
}
fclose(fptr);
return 0;
}
The bug in your code is that getc() only get one character into memory. SO you must NOT make this *(ch+a) == * word since ch has a value not an address.
let ch='x' and let a=10 so *(ch+a)==*('x'+10) which would derefernce an address you didn't allocate.
This website implements countOccurancees function, which take a pointer to const char and a file pointer and return the number of word occurrences.
The strstr() helps finding a first occurrence of a word by returning a pointer to the beginning of the located subā€string.
#define BUFFER_SIZE 100
int countOccurrences(FILE *fptr, const char *word)
{
char str[BUFFER_SIZE];
char *pos;
int index, count;
count = 0;
// Read line from file till end of file.
while ((fgets(str, BUFFER_SIZE, fptr)) != NULL)
{
index = 0;
// Find next occurrence of word in str
while ((pos = strstr(str + index, word)) != NULL)
{
// Index of word in str is
// Memory address of pos - memory
// address of str.
index = (pos - str) + 1;
count++;
}
}
return count;
}
So in main function just make i=countOccurrences(fptr, word);
main should be look like
int main()
{
FILE * fptr;
char * word;
int i=0;
word = malloc(25 * sizeof(char));//Do NOT cast
fptr = fopen("text.txt", "r");
if (!fptr)
printf("File not found. \n");
else
{
printf("Word: ");
scanf("%s", word);
i=countOccurrences(fptr, word);
}
if (i==0)
printf("Word not found.\n");
else
printf("Word found %d times.\n",i);
fclose(fptr);
return 0;
}

Find number of occurrences for the substring in a string using C programming

I am trying a program in c to read a text file that contains array of characters or a string and find the number of occurrences of the substring called "GLROX" and say sequence found when it is found. And the "inputGLORX.txt" contains following string inside it.
GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX
But i am getting wierd results. It would be great if some expert in C-programming helps me to solve this and thanks in advance.
#include <stdio.h>
#include <conio.h>
#include <string.h>
#define NUMBER_OF_STRINGS 40
#define MAX_STRING_SIZE 7
void seqFound()
{
printf("Sequence Found\n");
}
int main()
{
FILE *fp;
char buff[1000];
char strptrArr[NUMBER_OF_STRINGS] [MAX_STRING_SIZE];
const char *search = "GLROX";
fp = fopen("D:/CandC++/inputGLORX.txt", "r");
if(fp==NULL)
printf("It is a null pointer");
while(!feof(fp))
{
//fscanf(fp, "%s", buff);
fgets(buff, 1000,fp);
}
int len = strlen(buff);
printf("length is %d\n",len);
int count = 0;
char *store;
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++;
}
printf("count is %d\n",count);
while (count!=0) {
seqFound();
count--;
}
return 0;
}
As said in the comment, their are at least 2 problems in the code: your fgets will only fetch the last line (if it fetch one at all ? In any case, this is not what you want), and you are incrementing the search string instead of the buff string.
Something like this should fix most of your problems, as long as no lines in your file are longer than 999 characters. This will not work properly if you use the \n or NULL characters in your search string.
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
Here is a main for testing. I used argv to provide the input.txt and the search string.
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv)
{
FILE *fp;
char buff[1000];
char *search;
if (argc < 3)
return (-1);
search = argv[2];
if (search[0] == '\0')
return (-1);
if ((fp = fopen(argv[1], "r")) == NULL)
return (-1);
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
printf("Match found: %d\n", count);
return 0;
}
The way you search in buff is wrong, i.e. this code:
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++; // <------- ups
}
When you have a hit, you change search, i.e. the string you are looking for. That's not what you want. The search string (aka the needle) shall be the same all the time. Instead you want to move forward in the buffer buff so that you can search in the remainder of the buffer.
That could be something like:
int main()
{
const char* buff = "GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX";
const char* search = "GLROX";
const char* remBuff = buff; // Pointer to the remainder of buff
// Initialized to be the whole buffer
const char* hit;
int cnt = 0;
while((hit = strstr(remBuff, search))) // Search in the remainder of buff
{
++cnt;
remBuff = hit + 1; // Update the remainder pointer so it points just 1 char
// after the current hit
}
printf("Found substring %d times\n", cnt);
return 0;
}
Output:
Found substring 15 times

fprintf exercise working but the txt is not valid after running

I'm having a problem I can't tell what to do. I have this task that asks me to take the words from a file (let's say input.txt) and write them all in another file (let's say output.txt) in the reverse order. The fun part is that when I execute my program (I have a VM of Xubuntu given to me from my University) it's impossible to double click the output.txt file 'cause something's wrong: I'll try to translate it:
"Byte sequence invalid in the conversion's input"
The fun part is that if I do "gedit output.txt" I do see those words all well written how they should be, so I'd say my program "works but corrupts the file". What can be the theoreticals reasons behind this? And if you wanna help me out pointing them, here's a link to my code [EDIT, it seems I can't post the pastebin link. Here's the part that involves the output file]:
`char *t=malloc(sizeof(char)*1025), *tmp;
t=fgets(t,1024,fp);
while(t!=NULL){
tmp=strtok(t, " ");
while(tmp!=NULL){
tmp=strrev(tmp);
fprintf(fo, " %s", tmp);
tmp=strtok(NULL, " ");
}
t=fgets(t,1024,fp);
}`
I hope I didn't break any rule 'cause I'm still new to this site, thanks all!
It's seem there is not a problem with your code itself.
Can you test this and say if your problem is here again ?
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char *strrev(char *str)
{
int i = 0;
int j = strlen(str) - 1;
while (i < j) {
char tmp = str[i];
str[i] = str[j];
str[j] = tmp;
++i;
--j;
}
return (str);
}
#define BUFFER_LEN 1024
int main(int argc, char* argv[])
{
char line[BUFFER_LEN + 1];
FILE *fp = NULL;
FILE *fo = NULL;
if (argc != 3 && argc != 4) {
fprintf(stderr, "Devi usare 2 o 3 argomenti grz\n");
return -1;
}
if (!(fp = fopen(argv[1],"r"))) { //argv1=input.txt
fprintf(stderr, "Devi \n");
return -1;
}
if (!(fo = fopen(argv[2], (argc == 4 && strcmp(argv[3],"a")) ? "a" : "w"))) {
fprintf(stderr, " 3 argomenti grz\n");
return -1;
}
while(fgets(line, BUFFER_LEN, fp)) {
for (char *word = strtok(line, " "); word; word = strtok(NULL, " ")) {
fprintf(fo, "%s ", strrev(word));
}
}
fclose(fp);
fclose(fo);
return 0;
}
Can you compile & run this code and say if "mousepad" is still losing himself ?
int main(int argc, char* argv[])
{
FILE *fo = NULL;
if (argc != 3 && argc != 4) {
fprintf(stderr, "Devi usare 2 o 3 argomenti grz\n");
return -1;
}
if (!(fo = fopen(argv[2], "w"))) {
fprintf(stderr, " 3 argomenti grz\n");
return -1;
}
fprintf(fo, "test");
fclose(fo);
return 0;
}

Read from a text file and use each line to compare if they are anagrams

I must modify my program to accept input from
a file called anagrams.txt.This file should have two strings per line, separated by the # character. My program should read
each pair of strings and report back if each pair of strings is an anagram. For example consider the following content of anagrams.txt:
hello#elloh
man#nam
Astro#Oastrrasd
Your program should print out the following:
hello#elloh - Anagrams!
man#nam - Anagrams!
Astro#Oastrrasd- Not anagrams!
I should compile in g++
Here is the code to read from text:
int main()
{
char input[30];
if(access( "anagrams.txt", F_OK ) != -1) {
FILE *ptr_file;
char buf[1000];
ptr_file =fopen("anagrams.txt","r"); if (!ptr_file)
return 1;
while (fgets(buf,1000, ptr_file)!=NULL)
printf("%s",buf);
fclose(ptr_file);
printf("\n");
}
else{ //if file does not exist
printf("\nFile not found!\n");
}
return 0;
}
Code to find if the text are anagrams:
#include <stdio.h>
int find_anagram(char [], char []);
int main()
{
char array1[100], array2[100];
int flag;
printf("Enter the string\n");
gets(array1);
printf("Enter another string\n");
gets(array2);
flag = find_anagram(array1, array2);
if (flag == 1)
printf(" %s and %s are anagrams.\n", array1, array2);
else
printf("%s and %s are not anagrams.\n", array1, array2);
return 0;
}
int find_anagram(char array1[], char array2[])
{
int num1[26] = {0}, num2[26] = {0}, i = 0;
while (array1[i] != '\0')
{
num1[array1[i] - 'a']++;
i++;
}
i = 0;
while (array2[i] != '\0')
{
num2[array2[i] -'a']++;
i++;
}
for (i = 0; i < 26; i++)
{
if (num1[i] != num2[i])
return 0;
}
return 1;
}
You can try something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXLINE 1000
#define MAXLETTER 256
int is_anagram(char *word1, char *word2);
void check_lines(FILE *filename);
int cmpfunc(const void *a, const void *b);
void convert_to_lowercase(char *word);
int
main(int argc, char const *argv[]) {
FILE *filename;
if ((filename = fopen("anagram.txt", "r")) == NULL) {
fprintf(stderr, "Error opening file\n");
exit(EXIT_FAILURE);
}
check_lines(filename);
fclose(filename);
return 0;
}
void
check_lines(FILE *filename) {
char line[MAXLINE];
char *word1, *word2, *copy1, *copy2;
while (fgets(line, MAXLINE, filename) != NULL) {
word1 = strtok(line, "#");
word2 = strtok(NULL, "\n");
copy1 = strdup(word1);
copy2 = strdup(word2);
convert_to_lowercase(copy1);
convert_to_lowercase(copy2);
if (is_anagram(copy1, copy2)) {
printf("%s#%s - Anagrams!\n", word1, word2);
} else {
printf("%s#%s - Not Anagrams!\n", word1, word2);
}
}
}
void
convert_to_lowercase(char *word) {
int i;
for (i = 0; word[i] != '\0'; i++) {
word[i] = tolower(word[i]);
}
}
int
is_anagram(char *word1, char *word2) {
qsort(word1, strlen(word1), sizeof(*word1), cmpfunc);
qsort(word2, strlen(word2), sizeof(*word2), cmpfunc);
if (strcmp(word1, word2) == 0) {
return 1;
}
return 0;
}
int
cmpfunc(const void *a, const void *b) {
if ((*(char*)a) < (*(char*)b)) {
return -1;
}
if ((*(char*)a) > (*(char*)b)) {
return +1;
}
return 0;
}
Since this looks like a University question, I won't provide a full solution, only a hint.
All you have to do is replace the stdin input part of the anagram-finding file with the code you wrote to read from a file: it's as simple as changing
printf("Enter the string\n");
gets(array1);
printf("Enter another string\n");
gets(array2);
to
// before program:
#define SIZE 1000
// inside main
if (access("anagrams.txt", F_OK) == -1){
printf("\nFile not found!\n");
return 1; // Abort the program early if we can't find the file
}
FILE *ptr_file;
char buf[1000];
ptr_file = fopen("anagrams.txt","r");
if (!ptr_file)
return 1;
char array1[SIZE], array2[SIZE];
while (fgets(buf, 1000, ptr_file)!=NULL){
// do all your anagram stuff here!
// there is currently one line of the input file stored in buf
// Hint: You need to split buf into array_1 and array_2 using '#' to separate it.
}
fclose(ptr_file);
printf("\n");
Additional comments:
Don't ever ever ever use gets. gets doesn't check that the string it writes to can hold the data, which will cause your program to crash if it gets input bigger than the array size. Use fgets(buf, BUF_SIZE, stdin) instead.
Beautiful code is good code. People are more likely to help if they can read your code easily. (fix your brackets)
Just for interest, a more efficient algorithm for checking anagrams is to use qsort to sort both arrays, then a simple string matcher to compare them. This will have cost O(mnlog(m+n)), as opposed to O(m^2 n^2), awith the current algorithm
You need to split every line you read by fgets (as you did) in to two strings, and pass them to your find_anagram function. You can do that using strtok:
int main()
{
int flag;
char buf[1000];
FILE *ptr_file;
//Check file existence
//Open the file for reading
while (fgets (buf, 1000, ptr_file) != NULL)
{
char *array1 = strtok(buf, "#");
char *array2 = strtok(NULL, "\n");
flag = find_anagram (array1, array2);
//Check flag value to print your message
}
return 0;
}
//put your find_anagram function
Don't forget to #include <string.h> to use strtok().

Seg Fault when working with strings C Program / Popen

I have looked for an answer to my question for almost two days and tried every solution suggested to no avail.
I am trying to access a file through a linux terminal using my C Program.
I want to run popen() to do this.
The command I want to run in popen() is : grep -o %s /usr/share/dict/words
Where %s is a variable word that changes each iteration. I have tried using pointers, arrays, and alternative functions such as asprintf() / snprintf()
Here is the code I have right now:
char *message = (char *)malloc(500);
strcpy(message, "grep -n");
printf("%s", message);
strcat(message, "hello");
printf("%s", message);
strcat(message, " /usr/share/dict/words"); // SEG FAULT OCCURS HERE
printf("%s", message);
I would then pass this to popen.
I have also tried initializing as: char message[500] and this returns the same error in the same spot.
Here is my full code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "caeserheader.h"
int main( int argc, char *argv[]){
char *inputfile;
int n = 0;
int shiftamount = 0;
//Determine amount of arguments
if(argc == 2){
inputfile = argv[1];
}
else if(argc == 3){
inputfile = argv[1];
n = atoi(argv[2]);
shiftamount = n * (-1) ;
}
else{
printf("Please enter a proper number of arguments.");
return -1;
}
//OPENS INPUT FILE
FILE *input = fopen(inputfile, "r");
if(input == NULL){
printf("\n FILE NOT FOUND.");
perror("fopen");
return -1;
}
//RESERVES MEMORY AND GRABS STRING
fseek(input, 0L, SEEK_END);
long Tsize = ftell(input);
rewind(input);
char *inputtext;
inputtext = calloc( 1, Tsize+1);
//ERROR CHECKING
if(!inputtext){
fclose(input), printf("MEMORY FAILED.");
}
if(1!=fread( inputtext, Tsize, 1, input)){
fclose(input), free(inputtext), printf("READ FAIL.");
}
//CREATES DECRYPTED STRING
char newletter;
char *newstring;
int i;
//WITH GIVEN NUMBER OF SHIFTS
if(argc == 3){
newstring = malloc(Tsize + 1);
for(i=0; i<Tsize; i++){
newletter = shift(inputtext[i], shiftamount);
newstring[i] = newletter;
}
}
//WITHOUT GIVEN NUMBER OF SHIFTS
if(argc == 2){
char *message = (char *)malloc(500); //SEG FAULT SOMEWHERE HERE?
// strcpy(message, "grep -n");
// printf("%s", message);
//strcat(message, "hello");
// printf("%s", message);
// strcat(message, "/usr/share/dict/words");
//printf("%s", message);
// word = strtok(inputtext," ,.-!?\n");
// int i;
//for(i=0; i<10; i++){
//word = strtok(NULL," ,.-!?\n");
//printf("\n%s", word);
//}
// if(( fp = popen(message, "r")) == NULL){
//perror("No file stream found.");
//return -1;
// }
// else {
// pclose(fp);
// printf("FOUND.");
// }
}
// PUTS DECRYPTED STRING IN NEW FILE
char copiedname[100];
strcpy(copiedname, inputfile);
strcat(copiedname, ".dec");
FILE *newfile = fopen(copiedname, "w");
fputs(newstring, newfile);
// free(newstring);
fclose(input);
fclose(newfile);
return 0;
}
You have set inputfile to argv[1] and later you have used strcat to append to it. Don't do this. You don't own argv.
The strcat function appends a copy of the source string to the destination string, and then returns a pointer to the destination string. It does not "add two strings and return the result" which is how you seem to be using it.

Resources