#include <stdio.h>
#include <string.h>
int main() {
int counter1, counter2;
char line[200] = ""; //store all words that don't need to be deleted
char deleteWord[100]; //word that needs to be deleted
char space;
char word[100];
scanf("%s", deleteWord);
while (1) {
scanf("%s", word);
if (feof(stdin))
break;
// increment counter of total words
++counter1;
if (strcmp(word, deleteWord) == 0) {
// see if the word read in == delete word
// increment counter of deleted words
++counter2;
} else
if (strcmp(word, " ") == 0) { // space is an actual space
strcat(line, word);
strcat(line, " ");
} else
if (strcmp(word, "\n")) { // space a new line \n
strcat(line, word);
strcat(line, "\n");
}
}
printf("--NEW TEXT--\n%s", line);
return 0;
}
In summary, my code is supposed to remove a user input string (one or more words) from another user input string (containing or not containing the word(s)) and produce the output. The code removes the word but it adds a newline per word for each iteration. I believe it is doing this because the expression for the second else if is always true. However, when I properly add the strcmp function for the second else if statement, the code does not produce an output at all (no compiler errors - just missing input). Why is this happening and how do I do a strcmp function for a newline?
Your read the words with scanf("%s", word), which poses these problems:
all white space is ignored, so you cannot test for spaces nor newlines as you try to do in the loop, and you cannot keep track of line breaks.
you should tell scanf() the maximum number of bytes to store into the destination array word, otherwise any word longer than 99 characters will cause a buffer overflow and invoke undefined behavior.
you should test the return value of scanf() instead of callin feof() which might be true after the last word has been successfully read. You should simply write the loop as
while (scanf("%99s", word) == 1) {
// increment counter of total words
++counter1;
...
you do not test if the words fit in the line array either, causing a buffer overflow if the words kept amount to more than 199 characters including separators.
To delete a specific word from a stream, you could read one line at a time and delete the matching words from the line:
#include <ctype.h>
#include <stdio.h>
#include <string.h>
int main() {
char deleteWord[100]; //word that needs to be deleted
char line[1000]; //store all words that don't need to be deleted
printf("Enter the word to remove: ");
if (scanf("%99s", deleteWord) != 1)
return 1;
// read and discard the rest of the input line
int c;
while ((c = getchar()) != EOF && c != '\n')
continue;
size_t len = strlen(deleteWord);
printf("Enter the text: ");
while (fgets(line, sizeof line, stdin)) {
char *p = line;
char *q;
while ((p = strstr(p, deleteWord)) != NULL) {
if ((p == line || isspace((unsigned char)p[-1]))
&& (p[len] == '\0' || isspace((unsigned char)p[len]))) {
/* remove the word */
memmove(p, p + len, strlen(p + len) + 1);
} else {
p += len;
}
}
/* squeeze sequences of spaces as a single space */
for (p = q = line + 1; *p; p++) {
if (*p != ' ' || p[-1] != ' ')
*q++ = *p;
}
*q = '\0';
fputs(line, stdout);
}
return 0;
}
Related
how can split the word from its meaning
1. mammoth: large
My code:
void ReadFromFile(){
FILE *dictionary = fopen("dictionary.txt", "r");
char word[20];
char meaning[50];
while(fscanf(dictionary, "%[^:]:%[^\t]\t", word, meaning) == 2){
printf("%s %s\n", word, meaning);
}
fclose(dictionary);
Assuming the word and the meaning do not contain digits and dots,
my approach is the following:
First, split the input line on the digits and dots into the tokens which
have the form as word: meaning.
Next separate each token on the colon character.
As a finish up, remove the leading and trailing blank characters.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define INFILE "dictionary.txt"
void split(char *str);
void separate(char *str);
char *trim(char *str);
/*
* split line on serial number into "word" and "meaning" pairs
* WARNING: the array of "str" is modified
*/
void
split(char *str)
{
char *tk; // pointer to each token
char delim[] = "0123456789."; // characters used in the serial number
tk = strtok(str, delim); // get the first token
while (tk != NULL) {
separate(tk); // separate each token
tk = strtok(NULL, delim); // get the next token
}
}
/*
* separate the pair into "word" and "meaning" and print them
*/
void
separate(char *str)
{
char *p;
if (NULL == (p = index(str, ':'))) {
// search a colon character in "str"
fprintf(stderr, "Illegal format: %s\n", str);
exit(1);
}
*p++ = '\0'; // terminate the "word" string
// now "p" points to the start of "meaning"
printf("%s %s\n", trim(str), trim(p));
}
/*
* remove leading and trailing whitespaces
* WARNING: the array of "str" is modified
*/
char *
trim(char *str)
{
char *p;
for (p = str; *p != '\0'; p++); // jump to the end of "str"
for (; p > str && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n' || *p == '\0'); p--);
// rewind the pointer skipping blanks
*++p = '\0'; // chop the trailing blanks off
for (p = str; *p != '\0' && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'); p++);
// skip leading blanks
return p;
}
int
main()
{
FILE *fp;
char str[BUFSIZ];
if (NULL == (fp = fopen(INFILE, "r"))) {
perror(INFILE);
exit(1);
}
while (NULL != fgets(str, BUFSIZ, fp)) {
split(trim(str));
}
fclose(fp);
return 0;
}
Output:
foe enemy
vast huge
purchase buy
drowsy sleepy
absent missing
prank trick
[snip]
[Alternative]
I suppose C may not be a suitable language for this kind of string manipulations. High-level languages such as python, perl or ruby will solve it with much fewer codes. Here is an example with python which will produce the same results:
import re
with open("dictionary.txt") as f:
s = f.read()
for m in re.finditer(r'\d+\.\s*(.+?):\s*(\S+)', s):
print(m.group(1) + " " + m.group(2))
Input file contains a completely empty line at line 2 and an unnecessary white space after the final full stop of the text. With this input file I am getting 48 words while I was suppose to get 46 words.
My input file contains:
"Opening from A Tale of Two Cities by Charles Darwin
It was the best of times, it was the worst of times. It was the age
of wisdom, it was the age of foolishness. It was the epoch of
belief, it was the epoch of incredulity. "
Here's how I tried:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define max_story_words 1000
#define max_word_length 80
int main (int argc, char **argv)
{
char story[max_story_words][max_word_length] = {{0}};
char line[max_story_words] = {0};
char *p;
char ch = 0;
char *punct="\n ,!.:;?-";
int num_words = 1;
int i = 0;
FILE *file_story = fopen ("TwoCitiesStory.txt", "r");
if (file_story==NULL) {
printf("Unable to open story file '%s'\n","TwoCitiesStory.txt");
return (EXIT_FAILURE);
}
/* count words */
while ((ch = fgetc (file_story)) != EOF) {
if (ch == ' ' || ch == '\n')
num_words++;
}
rewind (file_story);
i = 0;
/* read each line in file */
while (fgets (line, max_word_length, file_story) != NULL)
{
/* tokenize line into words removing punctuation chars in punct */
for (p = strtok (line, punct); p != NULL; p = strtok (NULL, punct))
{
/* convert each char in p to lower-case with tolower */
char *c = p;
for (; *c; c++)
*c = tolower (*c);
/* copy token (word) to story[i] */
strncpy ((char *)story[i], p, strlen (p));
i++;
}
}
/* output array */
for(i = 0; i < num_words; i++)
printf ("story[%d]: %s\n", i, story[i]);
printf("\ntotal words: %d\n\n",num_words);
return (EXIT_SUCCESS);
}
Your num_words takes account of the two extra whitespaces, that's why you get 48.
You should simply print i immediately after the fgets-strtok loop, if I'm not mistaken.
Something along these lines:
while ((ch = fgetc (file_story)) != EOF) {
if (ch == ' ') {
num_words++;
while( (ch = fgetc (file_story)) == ' ' && (ch != EOF) )
}
if (ch == '\n') {
num_words++;
while( (ch = fgetc (file_story)) == '\n' && (ch != EOF) )
}
Though I wonder why you are only taking whitespace and newline characters for counting new words. Two words separated by some other punctuation mark are definitely not accouted for in your code
My suggestion is to change the words counting loop as follows:
/* count words */
num_words = 0;
int flag = 0; // set 1 when word starts and 0 when word ends
while ((ch = fgetc (file_story)) != EOF) {
if ( isalpha(ch) )
{
if( 0 == flag ) // if it is a first letter of word ...
{
num_words++; // ... add to word count
flag = 1; // and set flag to skip not first letters
}
continue;
}
if ( isspace(ch) || ispunct(ch) ) // if word separator ...
{
flag = 0; // ... reset flag
}
}
I have created the two following functions. The first, eatWrd, returns the first word in a string without any white spaces, and removes the first word from the input string:
MAX is a number representing the max length of a string
char* eatWrd(char * cmd)
{
int i = 0; //i will hold my place in cmd
int count = 0; //count will hold the position of the second word
int fw = 0; //fw will hold the position of the first word
char rest[MAX]; // rest will hold cmd without the first word
char word[MAX]; // word will hold the first word
// start by removing initial white spaces
while(cmd[i] == ' ' || cmd[i] == '\t'){
i++;
count++;
fw++;
}
// now start reading the first word until white spaces or terminating characters
while(cmd[i] != ' ' && cmd[i] != '\t' && cmd[i] != '\n' && cmd[i] != '\0'){
word[i-fw] = cmd[i];
i++;
count++;
}
word[i-fw] = '\0';
// now continue past white spaces after the first word
while(cmd[i] == ' ' || cmd[i] == '\t'){
i++;
count++;
}
// finally save the rest of cmd
while(cmd[i] != '\n' && cmd[i] != '\0'){
rest[i-count] = cmd[i];
i++;
}
rest[i-count] = '\0';
// reset cmd, and copy rest back into it
memset(cmd, 0, MAX);
strcpy(cmd, rest);
// return word as a char *
char *ret = word;
return ret;
}
The second, frstWrd, just returns the first word without modifying the input string:
// this function is very similar to the first without modifying cmd
char* frstWrd(char * cmd)
{
int i = 0;
int fw = 0;
char word[MAX];
while(cmd[i] == ' ' || cmd[i] == '\t'){
i++;
fw++;
}
while(cmd[i] != ' ' && cmd[i] != '\t' && cmd[i] != '\n' && cmd[i] != '\0'){
word[i-fw] = cmd[i];
i++;
}
word[i-fw] = '\0';
char *ret = word;
return ret;
}
To test the function, I used fgets to read a string from the User(me), and then I printed three strings (frstWrd(input), eatWrd(input), eatWrd(input)). I would have expected that given a string, "my name is tim" for example, the program would print "my my name", but instead it prints the third word three times over, "is is is":
// now simply test the functions
main()
{
char input[MAX];
fgets(input, MAX - 1, stdin);
printf("%s %s %s", frstWrd(input), eatWrd(input), eatWrd(input));
}
I have looked over my functions over and over and cannot see the mistake. I believe there is simply something I don't know about printf, or about using multiple string modification functions as arguments in another function. Any insight would be helpful thanks.
As I see rest and word are local variables in the function eatWrd. So it is bad practice to return pointer to such memory outside functions.
EDIT 1:
Also you should understand, that in line
printf("%s %s %s", frstWrd(input), eatWrd(input), eatWrd(input));
function eatWrd(input) could be called the first (before frstWrd(input)).
EDIT 2:
This can be usefull in finction eatWrd
//char rest[MAX]; // rest will hold cmd without the first word
char * rest = (char*) malloc(MAX);
And new main let be as:
int main()
{
char input[MAX];
fgets(input, MAX - 1, stdin);
printf("%s ", frstWrd(input));
printf("%s ", eatWrd(input));
printf("%s\n", eatWrd(input));
}
And in the end my solution for frstWrd (just to show how standard functions can be useful):
char* frstWrd(char * cmd)
{
char * word = (char *) malloc(MAX);
sscanf(cmd, "%s", word);
return word;
}
I am currently creating a simple shell for homework and I've run into a problem. Here is a snippet of code with the pieces that pertain to the problem (I may have forgotten some pieces please tell me if you see anything missing):
eatWrd returns the first word from a string, and takes that word out of the string.
wrdCount, as implied, returns the number of words in a string.
if either of these codes are necessary for a response I can post them, just please tell me, I am almost 100% positive they are not the cause of the problem.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX 100
int main(void)
{
char input[MAX];
char *argm[MAX];
memset(input, 0, sizeof(input));
memset(argm, 0, sizeof(argm));
while(1){
printf("cmd:\n");
fgets(input, MAX-1, stdin);
for(i=0;i < wrdCount(input); i++){
argm[i] = eatWrd(input);
}
argm[i] = NULL;
if (!strncmp(argm[0],"cd" , 2)){
chdir(argm[1]);
}
if (!strncmp(argm[0],"exit", 4)){
exit(0);
}
memset(input, 0, sizeof(input));
memset(argm, 0, sizeof(argm));
}
}
Anyways, this loop works for lots of other commands using execvp, (such as cat, ls, etc.), when I use cd, it works as expected, except when I try to exit the shell, it takes multiple exit calls to actually get out. (as it turns out, the number of exit calls is exactly equal to the number of times I call cd). It only takes one exit call when I don't use cd during a session. I'm not really sure what's going on, any help is appreciated, thanks.
Here is eatWrd:
char* eatWrd(char * cmd)
{
int i = 0; // i keeps track of position in cmd
int count = 0; // count keeps track of position of second word
char rest[MAX_LINE]; // rest will hold cmd without the first word
char * word = (char *) malloc(MAX_LINE); //word will hold the first word
sscanf(cmd, "%s", word); //scan the first word into word
// iterate through white spaces, then first word, then the following white spaces
while(cmd[i] == ' ' || cmd[i] == '\t'){
i++;
count++;
}
while(cmd[i] != ' ' && cmd[i] != '\t' && cmd[i] != '\n' && cmd[i] != '\0'){
i++;
count++;
}
while(cmd[i] == ' ' || cmd[i] == '\t'){
i++;
count++;
}
// copy the rest of cmd into rest
while(cmd[i] != '\n' && cmd[i] != '\0'){
rest[i-count] = cmd[i];
i++;
}
rest[i-count] = '\0';
memset(cmd, 0, MAX_LINE);
strcpy(cmd, rest); //move rest into cmd
return word; //return word
}
And here is wrdCount:
int wrdCount(char *sent)
{
char *i = sent;
int words = 0;
//keep iterating through the string,
//increasing the count if a word and white spaces are passed,
// until the string is finished.
while(1){
while(*i == ' ' || *i == '\t') i++;
if(*i == '\n' || *i == '\0') break;
words++;
while(*i != ' ' && *i != '\t' && *i != '\n' && *i != '\0') i++;
}
return words;
}
This variation on your code works for me:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#define MAX 100
char *eatWrd(char **line) {
char *next_c = *line;
char *word_start = NULL;
while (isspace(*next_c)) next_c += 1;
if (*next_c) {
word_start = next_c;
do {
next_c += 1;
} while (*next_c && ! isspace(*next_c));
*next_c = '\0';
*line = next_c + 1;
}
return word_start;
}
int main(void)
{
char input[MAX];
char *argm[MAX];
while(1) {
int word_count = 0;
char *next_input = input;
printf("cmd:\n");
fgets(input, MAX, stdin);
do {
argm[word_count] = eatWrd(&next_input);
} while (argm[word_count++]);
/* The above always overcounts by one */
word_count -= 1;
if (!strcmp(argm[0], "cd")){
chdir(argm[1]);
} else if (!strcmp(argm[0], "exit")) {
exit(0);
}
}
}
Note my variation on eatWrd(), which does not have to move any data around, and which does not require pre-parsing the string to determine how many words to expect. I suppose your implementation would be more complex, so as to handle quoting or some such, but it could absolutely follow the same general approach.
Note, too, my correction to the command-matching conditions, using !strcmp() instead of strncmp().
I've made a simple spellchecker that reads in a dictionary and user text file to check against it. The program needs to display the line and word index of any word not in the dictionary. So it works fine until the user text file has a return \n character in it (at the end of a paragraph or sentence). So Hello is actually tested against the dictionary as Hello\n and the program believes its spelled incorrectly. Can anyone advise a method to remove the \n character? Here is my code:
#include <stdio.h>
#include <string.h>
void StrLower(char str[])
{
int i;
for (i = 0; str[i] != '\0'; i++)
str[i] = (char)tolower(str[i]);
}
int main (int argc, const char * argv[]) {
FILE *fpDict, *fpWords;
fpWords = fopen(argv[2], "r");
if((fpDict = fopen(argv[1], "r")) == NULL) {
printf("No dictionary file\n");
return 1;
}
char dictionaryWord[50]; // current word read from dictionary
char line[100]; // line read from spell check file (max 50 chars)
int isWordfound = 0; // 1 if word found in dictionary
int lineCount = 0; // line in spellcheck file we are currently on
int wordCount = 0; // word on line of spellcheck file we are currently on
while ( fgets ( line, sizeof line, fpWords ) != NULL )
{
lineCount ++;
wordCount = 0;
char *spellCheckWord;
spellCheckWord = strtok(line, " ");
while (spellCheckWord != NULL) {
wordCount++;
spellCheckWord = strtok(NULL, " ,");
if(spellCheckWord==NULL)
continue;
StrLower(spellCheckWord);
printf("'%s'\n", spellCheckWord);
while(!feof(fpDict))
{
fscanf(fpDict,"%s",dictionaryWord);
int res = strcmp(dictionaryWord, spellCheckWord);
if(res==0)
{
isWordfound = 1;
break;
}
}
if(!isWordfound){
printf("word '%s' not found in Dictionary on line: %d, word index: %d\n", spellCheckWord, lineCount, wordCount); //print word and line not in dictionary
}
rewind(fpDict); //resets dictionarry file pointer
isWordfound = 0; //resets wordfound for next iteration
}
}
fclose(fpDict);
fclose(fpWords);
return 0;
}
Wow thanks for the quick responses everyone. You guys are great, over the moon with that!
Remove the '\n' Immediately after the fgets() call:
while ( fgets ( line, sizeof line, fpWords ) != NULL )
{
size_t linelen = strlen(line);
assert((linelen > 0) && "this can happen only when file is binary");
if (line[linelen - 1] == '\n') line[--linelen] = 0; /* remove trailing '\n' and update linelen */
Try adding \n to the argument you pass to strtok.
If you simply want to remove the character for the sake of comparison, and know it will be at the end of a line, then when you read the word into your buffer, do a strchr() for \n and then replace that position with \0 if you find it.
How about:
size_t length = strlen(dictionaryWord);
if (length > 0 && dictionaryWord[length-1] == '\n') {
dictionaryWord[length-1] = 0;
}