The number of characters of comments in a file (C programming) - c

I can't seem to get it right, tried everything, but..
int commentChars() {
char str[256], fileName[256];
FILE *fp;
int i;
do{
long commentCount=0;
fflush(stdin);
printf("%s\nEnter the name of the file in %s/", p, dir);
gets(fileName);
if(!(fp=fopen(fileName, "r"))) {
printf("Error! File not found, try again");
return 0;
}
while(!feof(fp)) {
fgets(str,sizeof str,fp);
for(int i=0;i<=sizeof str;i++) {
if(str[i] == '/' && str[i+1] == '/') {
commentCount += (strlen(str)-2);
}
}
}
fclose(fp);
printf("All the chars, contained in a comment: %ld\n", commentCount);
puts(p);
printf("Do you want to search for another file?<Y/N>: ");
i=checker();
}while(i);}
The result is "All the chars, containted in a comment: 0", even though I have comments.
And my second question was.. Analogically, how can I do the same for comments, containing /* */, seems like an impossible job for me.

I think you best use regular expressions. They seem scary, but they're really not that bad for things like this. You can always try playing some regex golf to practice ;-)
I'd approach it as follows:
Build a regular expression that captures comments
Scan your file for it
Count the characters in the match
Using some regex code and a bit about matching comments in C, I hacked this together which should allow you to count all the bytes that are part of a block style comment /* */ - Including the delimiters. I only tested it on OS X. I suppose you can handle the rest?
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#define MAX_ERROR_MSG 0x1000
int compile_regex(regex_t *r, char * regex_text)
{
int status = regcomp (r, regex_text, REG_EXTENDED|REG_NEWLINE|REG_ENHANCED);
if (status != 0) {
char error_message[MAX_ERROR_MSG];
regerror (status, r, error_message, MAX_ERROR_MSG);
printf ("Regex error compiling '%s': %s\n",
regex_text, error_message);
return 1;
}
return 0;
}
int match_regex(regex_t *r, const char * to_match, long long *nbytes)
{
/* Pointer to end of previous match */
const char *p = to_match;
/* Maximum number of matches */
size_t n_matches = 10;
/* Array of matches */
regmatch_t m[n_matches];
while(1) {
int i = 0;
int nomatch = regexec (r, p, n_matches, m, 0);
if(nomatch) {
printf("No more matches.\n");
return nomatch;
}
//Just handle first match (the entire match), don't care
//about groups
int start;
int finish;
start = m[0].rm_so + (p - to_match);
finish = m[0].rm_eo + (p - to_match);
*nbytes += m[0].rm_eo - m[0].rm_so;
printf("match length(bytes) : %lld\n", m[0].rm_eo - m[0].rm_so);
printf("Match: %.*s\n\n", finish - start, to_match + start);
p += m[0].rm_eo;
}
return 0;
}
int main(int argc, char *argv[])
{
regex_t r;
char regex_text[128] = "/\\*(.|[\r\n])*?\\*/";
long long comment_bytes = 0;
char *file_contents;
size_t input_file_size;
FILE *input_file;
if(argc != 2) {
printf("Usage : %s <filename>", argv[0]);
return 0;
}
input_file = fopen(argv[1], "rb");
fseek(input_file, 0, SEEK_END);
input_file_size = ftell(input_file);
rewind(input_file);
file_contents = malloc(input_file_size * (sizeof(char)));
fread(file_contents, sizeof(char), input_file_size, input_file);
compile_regex(&r, regex_text);
match_regex(&r, file_contents, &comment_bytes);
regfree(&r);
printf("Found %lld bytes in comments\n", comment_bytes);
return 0;
}

This basically trivial modification of your code deals with several problems in your code.
You should not use feof() like that — `while (!feof(file)) is always wrong.
You should not read data that is not part of the string just read.
I've also refactored your code so that the function takes a file name, opens, counts and closes it, and reports on what it found.
#include <stdio.h>
#include <string.h>
// Revised interface - process a given file name, reporting
static void commentChars(char const *file)
{
char str[256];
FILE *fp;
long commentCount = 0;
if (!(fp = fopen(file, "r")))
{
fprintf(stderr, "Error! File %s not found\n", file);
return;
}
while (fgets(str, sizeof(str), fp) != 0)
{
int len = strlen(str);
for (int i = 0; i <= len; i++)
{
if (str[i] == '/' && str[i + 1] == '/')
{
commentCount += (strlen(str) - 2);
break;
}
}
}
fclose(fp);
printf("%s: Number of characters contained in comments: %ld\n", file, commentCount);
}
int main(int argc, char **argv)
{
if (argc == 1)
commentChars("/dev/stdin");
else
{
for (int i = 1; i < argc; i++)
commentChars(argv[i]);
}
return 0;
}
When run on the source code (ccc.c), it yields:
ccc.c: Number of characters contained in comments: 58
The comment isn't really complete (oops), but it serves to show what goes on. It counts the newline which fgets() preserves as part of the comment, though the // introducer is not counted.
Dealing with /* comments is harder. You need to spot a slash followed by a star, and then read up to the next star slash character pair. This is probably more easily done using character by character input than line-by-line input; you will, at least, need to be able to interleave character analysis with line input.
When you're ready for it, you can try this torture test on your program. It's what I use to check my comment stripper, SCC (which doesn't handle trigraphs — by conscious decision; if the source contains trigraphs, I have a trigraph remover which I use on the source first).
/*
#(#)File: $RCSfile: scc.test,v $
#(#)Version: $Revision: 1.7 $
#(#)Last changed: $Date: 2013/09/09 14:06:33 $
#(#)Purpose: Test file for program SCC
#(#)Author: J Leffler
*/
/*TABSTOP=4*/
// -- C++ comment
/*
Multiline C-style comment
#ifndef lint
static const char sccs[] = "#(#)$Id: scc.test,v 1.7 2013/09/09 14:06:33 jleffler Exp $";
#endif
*/
/*
Multi-line C-style comment
with embedded /* in line %C% which should generate a warning
if scc is run with the -w option
Two comment starts /* embedded /* in line %C% should generate one warning
*/
/* Comment */ Non-comment /* Comment Again */ Non-Comment Again /*
Comment again on the next line */
// A C++ comment with a C-style comment marker /* in the middle
This is plain text under C++ (C99) commenting - but comment body otherwise
// A C++ comment with a C-style comment end marker */ in the middle
The following C-style comment end marker should generate a warning
if scc is run with the -w option
*/
Two of these */ generate */ one warning
It is possible to have both warnings on a single line.
Eg:
*/ /* /* */ */
SCC has been trained to handle 'q' single quotes in most of
the aberrant forms that can be used. '\0', '\\', '\'', '\\
n' (a valid variant on '\n'), because the backslash followed
by newline is elided by the token scanning code in CPP before
any other processing occurs.
This is a legitimate equivalent to '\n' too: '\
\n', again because the backslash/newline processing occurs early.
The non-portable 'ab', '/*', '*/', '//' forms are handled OK too.
The following quote should generate a warning from SCC; a
compiler would not accept it. '
\n'
" */ /* SCC has been trained to know about strings /* */ */"!
"\"Double quotes embedded in strings, \\\" too\'!"
"And \
newlines in them"
"And escaped double quotes at the end of a string\""
aa '\\
n' OK
aa "\""
aa "\
\n"
This is followed by C++/C99 comment number 1.
// C++/C99 comment with \
continuation character \
on three source lines (this should not be seen with the -C flag)
The C++/C99 comment number 1 has finished.
This is followed by C++/C99 comment number 2.
/\
/\
C++/C99 comment (this should not be seen with the -C flag)
The C++/C99 comment number 2 has finished.
This is followed by regular C comment number 1.
/\
*\
Regular
comment
*\
/
The regular C comment number 1 has finished.
/\
\/ This is not a C++/C99 comment!
This is followed by C++/C99 comment number 3.
/\
\
\
/ But this is a C++/C99 comment!
The C++/C99 comment number 3 has finished.
/\
\* This is not a C or C++ comment!
This is followed by regular C comment number 2.
/\
*/ This is a regular C comment *\
but this is just a routine continuation *\
and that was not the end either - but this is *\
\
/
The regular C comment number 2 has finished.
This is followed by regular C comment number 3.
/\
\
\
\
* C comment */
The regular C comment number 3 has finished.
Note that \u1234 and \U0010FFF0 are legitimate Unicode characters
(officially universal character names) that could appear in an
id\u0065ntifier, a '\u0065' character constant, or in a "char\u0061cter\
string". Since these are mapped long after comments are eliminated,
they cannot affect the interpretation of /* comments */. In particular,
none of \u0002A. \U0000002A, \u002F and \U0000002F ever constitute part
of a comment delimiter ('*' or '/').
More double quoted string stuff:
if (logtable_out)
{
sprintf(logtable_out,
"insert into %s (bld_id, err_operation, err_expected, err_sql_stmt, err_sql_state)"
" values (\"%s\", \"%s\", \"%s\", \"", str_logtable, blade, operation, expected);
/* watch out for embedded double quotes. */
}
/* Non-terminated C-style comment at the end of the file

#include <stdio.h>
size_t counter(FILE *fp){
int ch, chn;
size_t count = 0;
enum { none, in_line_comment, in_range_comment, in_string, in_char_constant } status;
#if 0
in_range_comment : /* this */
in_line_comment : //this
in_string : "this"
in_char_constnt : ' '
#endif
status = none;
while(EOF!=(ch=fgetc(fp))){
switch(status){
case in_line_comment :
if(ch == '\n'){
status = none;
}
++count;
continue;
case in_range_comment :
if(ch == '*'){
chn = fgetc(fp);
if(chn == '/'){
status = none;
continue;
}
ungetc(chn, fp);
}
++count;
continue;
case in_string :
if(ch == '\\'){
chn = fgetc(fp);
if(chn == '"'){
continue;
}
ungetc(chn, fp);
} else {
if(ch == '"')
status = none;
}
continue;
case in_char_constant :
if(ch == '\\'){
chn = fgetc(fp);
if(chn == '\''){
continue;
}
ungetc(chn, fp);
} else {
if(ch == '\'')
status = none;
}
continue;
case none :
switch(ch){
case '/':
if('/' == (chn = fgetc(fp))){
status = in_line_comment;
continue;
} else if('*' == chn){
status = in_range_comment;
continue;
} else
ungetc(chn, fp);
break;
case '"':
status = in_string;
break;
case '\'':
status = in_char_constant;
break;
}
}
}
return count;
}
int main(void){
FILE *fp = stdin;
size_t c = counter(fp);
printf("%lu\n", c);
return 0;
}

Related

Strtok strange behaviour

I'm having some troubles using strtok function.
As an exercise I have to deal with a text file by ruling out white spaces, transforming initials into capital letters and printing no more than 20 characters in a line.
Here is a fragment of my code:
fgets(sentence, SIZE, f1_ptr);
char *tok_ptr = strtok(sentence, " \n"); //tokenazing each line read
tok_ptr[0] = toupper(tok_ptr[0]); //initials to capital letters
int num = 0, i;
while (!feof(f1_ptr)) {
while (tok_ptr != NULL) {
for (i = num; i < strlen(tok_ptr) + num; i++) {
if (i % 20 == 0 && i != 0) //maximum of 20 char per line
fputc('\n', stdout);
fputc(tok_ptr[i - num], stdout);
}
num = i;
tok_ptr = strtok(NULL, " \n");
if (tok_ptr != NULL)
tok_ptr[0] = toupper(tok_ptr[0]);
}
fgets(sentence, SIZE + 1, f1_ptr);
tok_ptr = strtok(sentence, " \n");
if (tok_ptr != NULL)
tok_ptr[0] = toupper(tok_ptr[0]);
}
The text is just a bunch of lines I just show as a reference:
Watch your thoughts ; they become words .
Watch your words ; they become actions .
Watch your actions ; they become habits .
Watch your habits ; they become character .
Watch your character ; it becomes your destiny .
Here is what I obtain in the end:
WatchYourThoughts;Th
eyBecomeWords.WatchY
ourWords;THeyBecomeA
ctions.WatchYourActi
ons;TheyBecomeHabits
.WatchYourHabits;The
yBecomeCharacteR.Wat
chYourCharacter;ItBe
comesYourDEstiny.Lao
-Tze
The final result is mostly correct, but sometimes (for example "they" in they become (and only in that case) or "destiny") words are not correctly tokenized. So for example "they" is split into "t" and "hey" resulting in THey (DEstiny in the other instance) after the manipulations I made.
Is it some bug or am I missing something? Probably my code is not that efficient and some condition may end up being critical...
Thank you for the help, it's not that big of a deal, I just don't understand why such a behaviour is occurring.
You have a large number of errors in your code and you are over-complicating the problem. The most pressing error is Why is while ( !feof (file) ) always wrong? Why? Trace the execution-path within your loop. You attempt to read with fgets(), and then you use sentence without knowing whether EOF was reached calling tok_ptr = strtok(sentence, " \n"); before you ever get around to checking feof(f1_ptr)
What happens when you actually reach EOF? That IS "Why while ( !feof (file) ) is always wrong?" Instead, you always want to control your read-loop with the return of the read function you are using, e.g. while (fgets(sentence, SIZE, f1_ptr) != NULL)
What is it you actually need your code to do?
The larger question is why are you over-complicating the problem with strtok, and arrays (and fgets() for that matter)? Think about what you need to do:
read each character in the file,
if it is whitespace, ignore it, set the in-word flag false,
if a non-whitespace, if 1st char in word, capitalize it, output the char, set the in-word flag true and increment the number of chars output to the current line, and finally
if it is the 20th character output, output a newline and reset the counter zero.
The bare-minimum tools you need from your C-toolbox are fgetc(), isspace() and toupper() from ctype.h, a counter for the number of characters output, and a flag to know if the character is the first non-whitespace character after a whitespace.
Implementing the logic
That makes the problem very simple. Read a character, is it whitespace?, set your in-word flag false, otherwise if your in-word flag is false, capitalize it, output the character, set your in-word flag true, increment your word count. Last thing you need to do is check if your character-count has reached the limit, if so output a '\n' and reset your character-count zero. Repeat until you run out of characters.
You can turn that into a code with something similar to the following:
#include <stdio.h>
#include <ctype.h>
#define CPL 20 /* chars per-line, if you need a constant, #define one (or more) */
int main (int argc, char **argv) {
int c, in = 0, n = 0; /* char, in-word flag, no. of chars output in line */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while ((c = fgetc(fp)) != EOF) { /* read / validate each char in file */
if (isspace(c)) /* char is whitespace? */
in = 0; /* set in-word flag false */
else { /* otherwise, not whitespace */
putchar (in ? c : toupper(c)); /* output char, capitalize 1st in word */
in = 1; /* set in-word flag true */
n++; /* increment character count */
}
if (n == CPL) { /* CPL limit reached? */
putchar ('\n'); /* output newline */
n = 0; /* reset cpl counter */
}
}
putchar ('\n'); /* tidy up with newline */
if (fp != stdin) /* close file if not stdin */
fclose (fp);
}
Example Use/Output
Given your input file stored on my computer in dat/text220.txt, you can produce the output you are looking for with:
$ ./bin/text220 dat/text220.txt
WatchYourThoughts;Th
eyBecomeWords.WatchY
ourWords;TheyBecomeA
ctions.WatchYourActi
ons;TheyBecomeHabits
.WatchYourHabits;The
yBecomeCharacter.Wat
chYourCharacter;ItBe
comesYourDestiny.
(the executable for the code was compiled to bin/text220, I usually keep separate dat, obj, and bin directories for data, object files and executables to keep by source code directory clean)
note: by reading from stdin by default if no filename is provided as the first argument to the program, you can use your program to read input directly, e.g.
$ echo "my dog has fleas - bummer!" | ./bin/text220
MyDogHasFleas-Bummer
!
No fancy string functions required, just a loop, a character, a flag and a counter -- the rest is just arithmetic. It's always worth trying to boils your programming problems down to basic steps and then look around your C-toolbox and find the right tool for each basic step.
Using strtok
Don't get me wrong, there is nothing wrong with using strtok and it makes a fairly simple solution in this case -- the point I was making is that for simple character-oriented string-processing, it's often just a simple to loop over the characters in the line. You don't gain any efficiencies using fgets() with an array and strtok(), the read from the file is already placed into a buffer of BUFSIZ1.
If you did want to use strtok(), you should control you read-loop your with the return from fgets()and then you can tokenize with strtok() also checking its return at each point. A read-loop with fgets() and a tokenization loop with strtok(). Then you handle first-character capitalization and then limiting your output to 20-chars per-line.
You could do something like the following:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define CPL 20 /* chars per-line, if you need a constant, #define one (or more) */
#define MAXC 1024
#define DELIM " \t\r\n"
void putcharCPL (int c, int *n)
{
if (*n == CPL) { /* if n == limit */
putchar ('\n'); /* output '\n' */
*n = 0; /* reset value at mem address 0 */
}
putchar (c); /* output character */
(*n)++; /* increment value at mem address */
}
int main (int argc, char **argv) {
char line[MAXC]; /* buffer to hold each line */
int n = 0; /* no. of chars ouput in line */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (line, MAXC, fp)) /* read each line and tokenize line */
for (char *tok = strtok (line, DELIM); tok; tok = strtok (NULL, DELIM)) {
putcharCPL (toupper(*tok), &n); /* convert 1st char to upper */
for (int i = 1; tok[i]; i++) /* output rest unchanged */
putcharCPL (tok[i], &n);
}
putchar ('\n'); /* tidy up with newline */
if (fp != stdin) /* close file if not stdin */
fclose (fp);
}
(same output)
The putcharCPL() function is just a helper that checks if 20 characters have been output and if so outputs a '\n' and resets the counter. It then outputs the current character and increments the counter by one. A pointer to the counter is passed so it can be updated within the function making the updated value available back in main().
Look things over and let me know if you have further questions.
footnotes:
1. Depending on your version of gcc, the constant in the source setting the read-buffer size may be _IO_BUFSIZ. _IO_BUFSIZ was changed to BUFSIZ here: glibc commit 9964a14579e5eef9 For Linux BUFSIZE is defined as 8192 (512 on Windows).
This is actually a much more interesting OP from a professional point of view than some of the comments may suggest, despite the 'newcomer' aspect of the question, which may sometimes raise fairly deep, underestimated issues.
The fun thing is that on my platform (W10, MSYS2, gcc v.10.2), your code runs fine with correct results:
WatchYourThoughts;Th
eyBecomeWords.WatchY
ourWords;TheyBecomeA
ctions.WatchYourActi
ons;TheyBecomeHabits
.WatchYourHabits;The
yBecomeCharacter.Wat
chYourCharacter;ItBe
comesYourDestiny.
So first, congratulations, newcomer: your coding is not that bad.
This points to how different compilers may or may not protect against limited inappropriate coding or specification misuse, may or may not protect stacks or heaps.
This said, the comment by #Andrew Henle pointing to an illuminating answer about feof is quite relevant.
If you follow it and retrieve your feof test, just moving it down after read checks, not before (as below). Your code should yield better results (note: I will just alter your code minimally, deliberately ignoring lesser issues):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#define SIZE 100 // add some leeway to avoid off-by-one issues
int main()
{
FILE* f1_ptr = fopen("C:\\Users\\Public\\Dev\\test_strtok", "r");
if (! f1_ptr)
{
perror("Open issue");
exit(EXIT_FAILURE);
}
char sentence[SIZE] = {0};
if (NULL == fgets(sentence, SIZE, f1_ptr))
{
perror("fgets issue"); // implementation-dependent
exit(EXIT_FAILURE);
}
errno = 0;
char *tok_ptr = strtok(sentence, " \n"); //tokenizing each line read
if (tok_ptr == NULL || errno)
{
perror("first strtok parse issue");
exit(EXIT_FAILURE);
}
tok_ptr[0] = toupper(tok_ptr[0]); //initials to capital letters
int num = 0;
size_t i = 0;
while (1) {
while (1) {
for (i = num; i < strlen(tok_ptr) + num; i++) {
if (i % 20 == 0 && i != 0) //maximum of 20 char per line
fputc('\n', stdout);
fputc(tok_ptr[i - num], stdout);
}
num = i;
tok_ptr = strtok(NULL, " \n");
if (tok_ptr == NULL) break;
tok_ptr[0] = toupper(tok_ptr[0]);
}
if (NULL == fgets(sentence, SIZE, f1_ptr)) // let's get away whith annoying +1,
// we have enough headroom
{
if (feof(f1_ptr))
{
fprintf(stderr, "\n%s\n", "Found EOF");
break;
}
else
{
perror("Unexpected fgets issue in loop"); // implementation-dependent
exit(EXIT_FAILURE);
}
}
errno = 0;
tok_ptr = strtok(sentence, " \n");
if (tok_ptr == NULL)
{
if (errno)
{
perror("strtok issue in loop");
exit(EXIT_FAILURE);
}
break;
}
tok_ptr[0] = toupper(tok_ptr[0]);
}
return 0;
}
$ ./test
WatchYourThoughts;Th
eyBecomeWords.WatchY
ourWords;TheyBecomeA
ctions.WatchYourActi
ons;TheyBecomeHabits
.WatchYourHabits;The
yBecomeCharacter.Wat
chYourCharacter;ItBe
comesYourDestiny.
Found EOF

Compare 2 files

I have a problem, I need to make a program which will compare two files.
If in first file I have:
Milk
Sugar
Eggs
and in the second file I have
Vanilla
Soda
Sugar
I want to show the the line which appear in both files.
I don't have a lot of experience with c, but I tried something.
But my question is how I will show Sugar as output if they are not on the same line?
#include <stdio.h>
#include <stdlib.h>
#include<string.h>
#define MAX 100
void equal (char*lineone,char*linetwo){
if(strcmp(lineone,linetwo)==0){
printf("%s",lineone);
}
}
int main(){
FILE *fp1,*fp2;
fp1=fopen("D:/aici/file1.txt","r");
fp2=fopen("D:/aici/file2.txt","r");
char buff[MAX],buff1[MAX];
int i=0;
while((fgets(buff,MAX,fp1)!=NULL)&&(fgets(buff1,MAX,fp2))!=NULL){
//i++;
equal(buff,buff1);
}
}
What you should do (for performence reasons) is to save all the words in to two buffers and then compare them.
But , you can also do it with a little change in your implementation ,
Just need to seperate the loop to one main loop and one inner loop so you will get the effect that for each word in file 1 it will compare all words in file 2, again , very slow method when comparing to just save all the words first and only then compare each other.
void equal (char*lione,char*linetwo){
if(strcmp(lione,linetwo)==0){
printf("%s",lineone);
}
}
int main(){
FILE *fp1,*fp2;
fp1=fopen("D:/aici/file1.txt","r");
fp2=fopen("D:/aici/file2.txt","r");
char buff[MAX],buff1[MAX];
int i=0;
while(fgets(buff,MAX,fp1)!=NULL) {
while(fgets(buff1,MAX,fp2))!=NULL){
//i++;
equal(buff,buff1);
}
rewind(fp2);
}
}
Continuing from the comment, whether you continue using fgets (recommended), or you recognize that you can also use fscanf and not worry about removing the '\n' from each word, you need to validate each step of your program. While fscanf may appear easier at first, you may want to brush up on man fscanf and determine how you will control the '\n' that will be left, unread, in each of your file streams.
The following is a short example, continuing with fgets, showing how you can test for, and remove, each of the trailing '\n' read and included in your buff by fgets. (as well as reasonable validations for each step). (note: I'm presuming that since your input is a single word, a 256-char buffer is sufficient -- given the longest word in the unabridged dictionary is 28 characters, but you can also validate whether fgets has made a complete read of each line, or if additional characters remain unread)
The following code expects the filenames for each of the files to be given as the first two arguments to the program.
#include <stdio.h>
#include <string.h>
#define MAXC 256
int main (int argc, char **argv) {
if (argc < 3) { /* validate 2 arguments given */
fprintf (stderr, "error: insufficient input.\n"
"usage: %s file1 file2\n", argv[0]);
return 1;
}
char buf1[MAXC] = "", /* declare buf1 */
buf2[MAXC] = ""; /* declare buf2 */
FILE *f1 = fopen (argv[1], "r"), /* open file 1 */
*f2 = fopen (argv[2], "r"); /* open file 2 */
if (!f1) { /* validate file 1 open for reading */
fprintf (stderr, "file open failed '%s'\n", argv[1]);
return 1;
}
if (!f2) { /* validate file 2 open for reading */
fprintf (stderr, "file open failed '%s'\n", argv[2]);
return 1;
}
while (fgets (buf1, MAXC, f1)) { /* read each word in file 1 */
size_t len1 = strlen (buf1); /* get length */
if (len1 && buf1[len1 - 1] == '\n')
buf1[--len1] = 0; /* overwrite '\n' with nul-byte */
while (fgets (buf2, MAXC, f2)) { /* read each in file 2 */
size_t len2 = strlen (buf2);
if (len2 && buf2[len2 - 1] == '\n')
buf2[--len2] = 0; /* overwrite '\n' with nul-byte */
if (len1 != len2) /* if lengths differ, not equal */
continue; /* get next word from file 2 */
if (strcmp (buf1, buf2) == 0) /* compare strings */
printf ("%s\n", buf1); /* print if equal */
}
rewind (f2); /* rewind f2, clear EOF */
}
fclose (f1); /* close f1 */
fclose (f2); /* close f2 */
return 0;
}
(note: the length check if (len1 != len2) is just an efficiency check that prevents calling strcmp unless the words are equal in length. A simple comparison on the lengths (which you already have) is much less expensive than a full function call to strcmp every time. (note, this is a really small savings, that you can remove if you like))
Input Files (intentionally no POSIX-eol)
The datafiles were intentionally created without POSIX end-of-lines to demonstrate it makes no difference to the outcome if you properly handle the newline removal.
$ cat dat/f1cmp.txt
Milk
Sugar
Eggs
$ cat dat/f2cmp.txt
Vanilla
Soda
Sugar
Example Use/Output
$ ./bin/fgets_cmp_words dat/f1cmp.txt dat/f2cmp.txt
Sugar
Look things over and concentrate on the validations. Let me know if you have any further questions.
Showing Where Words Differ
To show where the words differ, you only need to modify the inner loop. You can do a simple comparison by looping over the characters in buf1 and buf2 and stopping when the first difference is located. You can continue for the two cases above (1) where the lengths differ; and (2) where the return of strcmp != 0, or you can just do a single test following a non-zero return from strcmp.
The modifications to the inner-loop above is shown below. I don't know what output format you are looking for, so I have just output the words that differ and shown the character at which the words begin to differ (zero-based indexing):
while (fgets (buf2, MAXC, f2)) { /* read each in file 2 */
size_t len2 = strlen (buf2);
int i = 0;
if (len2 && buf2[len2 - 1] == '\n')
buf2[--len2] = 0; /* overwrite '\n' with nul-byte */
if (len1 != len2) { /* if lengths differ, not equal */
/* locate & output difference */
for (i = 0; buf1[i] == buf2[i]; i++) {}
printf ("%s & %s differ at char %d (%c != %c)\n",
buf1, buf2, i, buf1[i], buf2[i]);
continue; /* get next word from file 2 */
}
if (strcmp (buf1, buf2) == 0) /* compare strings */
printf ("%s\n", buf1); /* print if equal */
else { /* locate & output difference */
for (i = 0; buf1[i] == buf2[i]; i++) {}
printf ("%s & %s differ at char %d (%c != %c)\n",
buf1, buf2, i, buf1[i], buf2[i]);
}
}
Example Use/Output
$ ./bin/fgets_cmp_wrds dat/f1cmp.txt dat/f2cmp.txt
Milk & Vanilla differ at char 0 (M != V)
Milk & Soda differ at char 0 (M != S)
Milk & Sugar differ at char 0 (M != S)
Sugar & Vanilla differ at char 0 (S != V)
Sugar & Soda differ at char 1 (u != o)
Sugar
Eggs & Vanilla differ at char 0 (E != V)
Eggs & Soda differ at char 0 (E != S)
Eggs & Sugar differ at char 0 (E != S)
Look it over and let me know if you have further questions.

Parsing simple name/value pair settings in config file with leading and terminating spaces - C

This is the code I made so far. I apologize if my buffer sizes are an overkill.
The idea is to read the entire configuration file (in this example, it's file.conf), and for now we assume it exists. I'll add error checking later.
Once the file is read into stack space, then the getcfg() function searches the configuration data for the specified name, and if it's found, returns the corresponding value. My function works when the configuration file contains leading spaces before names or values; such spaces are ignored.
Say this is my configuration file:
something=data
apples=oranges
fruit=banana
animals= cats
fried =chicken
My code will work correctly with the first four entries of the config file. for example, if I use "something" as the name, then "data" will be returned.
The last item won't work as of yet because of the trailing spaces after "fried" and before the =. I want to be able to have my function automatically remove those spaces, too, especially in case an option format such as
somethingelse = items
begins to be used. (Note the spaces on both sides of the = sign.)
What can I do to make a less CPU-intensive version of my program that also detects and removes trailing spaces from the name and value when processing the name and values?
Here's my current code:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
int getcfg(char* buf, char *name, char *val) {
int fl = 0, n = 0;
char cfg[1][10000], *p = buf;
memset(cfg, 0, sizeof(cfg));
while (*p) {
if (*p == '\n') {
if (strcmp(cfg[0], name) == 0) {
strcpy(val, cfg[1]);
return 1;
}
memset(cfg, 0, sizeof(cfg));
n = 0;
fl = 0;
} else {
if (*p == '=') {
n = 0;
fl = 1;
} else {
if (n != 0 || *p != ' ') {
cfg[fl][n] = *p;
n++;
}
}
}
p++;
}
return 0;
}
int main() {
char val[10000], buf[100000]; //val=value of config item, buf=buffer for entire config file ( > 100KB config file is nuts)
memset(buf, 0, sizeof(buf));
memset(val, 0, sizeof(val));
int h = open("file.conf", O_RDONLY);
if (read(h, buf, sizeof(buf)) < 1) {
printf("Can't read\n");
}
close(h);
printf("Value stat = %d ", getcfg(buf, "Item", val));
printf("Result = '%s'\n", val);
return 0;
}
Behold is a small (~15 lines) sscanf-based read_params() function which does the job. As a bonus, it understands the comments and complains about erroneous lines (if any):
$ cat config_file.c
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/errno.h>
#define ARRAY_SIZE(a) ((sizeof (a)) / (sizeof (a)[0]))
enum { MAX_LEN=128 };
struct param {
char name[MAX_LEN];
char value[MAX_LEN];
};
void strtrim(char *s)
{
char *p = s + strlen(s);
while (--p >= s && isspace(*p))
*p = '\0';
}
int read_params(FILE *in, struct param *p, int max_params)
{
int ln, n=0;
char s[MAX_LEN];
for (ln=1; max_params > 0 && fgets(s, MAX_LEN, in); ln++) {
if (sscanf(s, " %[#\n\r]", p->name)) /* emty line or comment */
continue;
if (sscanf(s, " %[a-z_A-Z0-9] = %[^#\n\r]",
p->name, p->value) < 2) {
fprintf(stderr, "error at line %d: %s\n", ln, s);
return -1;
}
strtrim(p->value);
printf("%d: name='%s' value='%s'\n", ln, p->name, p->value);
p++, max_params--, n++;
}
return n;
}
int main(int argc, char *argv[])
{
FILE *f;
struct param p[32];
f = argc == 1 ? stdin : fopen(argv[1], "r");
if (f == NULL) {
fprintf(stderr, "failed to open `%s': %s\n", argv[1],
strerror(errno));
return 1;
}
if (read_params(f, p, ARRAY_SIZE(p)) < 0)
return 1;
return 0;
}
Let's see how it works (quotes mark the beginning and the end of each line for clarity):
$ cat bb | sed -e "s/^/'/" -e "s/$/'/" | cat -n
1 'msg = Hello World! '
2 'p1=v1'
3 ' p2=v2 # comment'
4 ' '
5 'P_3 =v3'
6 'p4= v4#comment'
7 ' P5 = v5 '
8 ' # comment'
9 'p6 ='
$ ./config_file bb
1: name='msg' value='Hello World!'
2: name='p1' value='v1'
3: name='p2' value='v2'
5: name='P_3' value='v3'
6: name='p4' value='v4'
7: name='P5' value='v5'
error at line 9: p6 =
Note: as an additional bonus, the value can be anything, except #\n\r chars, including spaces, as can be seen above with the 'Hello World!' example. If it's not what needed, add space and tab into the exception list at the second sscanf() for the value (or specify accepted characters there instead) and drop strtrim() function.
I'll provide a straight-forward version, with everything being done in main and no key:value saving - the function only recognizes where they are and print them. I used the input file you gave and added one more line in the end as something = more_data.
This version of the parser does not recognize multiple data itens (itens separated by spaces in the data fields, you'll have to figure it out as an exercise).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(void)
{
int fd = open("file.conf", O_RDONLY, 0);
int i = 0;
char kv[100];
char c;
while (read(fd,&c,1) == 1) {
/* ignoring spaces and tabs */
if (c == '\t' || c == ' ') continue;
else if (c == '=') {
/* finished reading a key */
kv[i] = 0x0;
printf("key found [%s] ", kv);
i = 0;
continue;
} else if (c == '\n') {
/* finished reading a value */
kv[i] = 0x0;
printf(" with data [%s]\n", kv);
i = 0;
continue;
}
kv[i++] = c;
}
close(fd);
return 0;
}
And the output is:
key found [something] with data [data]
key found [apples] with data [oranges]
key found [fruit] with data [banana]
key found [animals] with data [cats]
key found [fried] with data [chicken]
key found [something] with data [more_data]
Explanation
while (read(fd,&c,1) == 1): reads one character at a time from the file.
if (c == '\t' || c == ' ') continue;: this is responsible for ignoring the white-spaces and tabs wherever they are.
else if (c == '='): If the program finds a = character, it concludes that what it just read was a key and treats it. What's inside that if should be easy to understand.
else if (c == '\n'): Then it uses a new-line character to recognize the end of a value. Again, what's inside the if is not hard to understand.
kv[i++] = c;: This is where we save the char value into the buffer kv.
So, with some minor changes, you can adapt this bit of code to become a parsing function that will suit your needs.
Edit and new code
As pointed out by John Bollinger in the comments, using read inside a while to read one character at a time is very costly. I'll post a second version of the program using the same input method OP was using (reading the whole file at once into a buffer) and then parsing it with another function.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
void parse(char *s)
{
char c, kv[100];
int i;
while ((c = *s++)) {
/* ignoring spaces and tabs */
if (c == '\t' || c == ' ') continue;
else if (c == '=') {
/* finished reading a key */
kv[i] = 0x0;
printf("key found [%s] ", kv);
i = 0;
continue;
} else if (c == '\n') {
/* finished reading a value */
kv[i] = 0x0;
printf(" with data [%s]\n", kv);
i = 0;
continue;
}
kv[i++] = c;
}
}
int main(void)
{
int fd = open("file.conf", O_RDONLY, 0);
char buffer[1000];
/* use the reading method that suits you best */
read(fd, buffer, sizeof buffer);
/* only thing parse() expects is a null-terminated string */
parse(buffer);
close(fd);
return 0;
}
It is very unusual to read a whole config file into memory as a flat image, and especially to keep such an image as the internal representation. One would ordinarily parse the file contents into key/value pairs as you go, and store a representation of those pairs.
Also, your use of read() is incorrect, as you cannot safely assume that it will read all bytes of the file in one call. One normally must call read() in a loop, keeping track of the return value from each call to know both when the end of the file is reached and where in the buffer to put the next bytes read.
If the configuration is supposed to be completely generic, so that you don't know in advance what keywords to expect, then you might organize the configuration data in a hash table or a binary search tree, with the parameter names as the keys. If you do know what parameters to expect (or at least which to allow), then you might have a variable or a struct member for each one.
Naturally, the approach to parameter lookup must be paired correctly with the data structure in which you store the parameters. Any of the approaches I suggested will make looking up multiple configuration parameters far faster. They would also avoid wasting memory, and would adapt to extremely large configurations (or at least could do so).
How best to approach reading the file depends on details of your config file format, such as whether keys and/or values are permitted to contain internal spaces, whether more than one key/value pair may appear on the same line, and whether there is an upper bound on the allowed length of config file lines or of keys and values. Here's an approach that expects one key/value pair per line, supports keys and values that contain internal whitespace (but not newlines), but neither of which is longer than 1023 characters, and where keys are not permitted to contain the '=' character:
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
int main() {
char key[1024];
char value[1024];
FILE *config;
int done;
config = fopen("file.conf", "r");
if (!config) {
perror("while opening file.conf");
return 1;
}
do {
char nl = '\0';
int nfields = fscanf(config, " %1023[^=\n]= %1023[^\n]%c", key, value, &nl);
int i;
done = 1;
if (nfields == EOF) {
if (ferror(config)) {
/* handle read error ... */
perror("while reading file.conf");
} else {
/* trailing empty line(s); ignore ... */
}
break;
} else if (nfields == 3) {
if (nl != '\n') {
/* handle excessive-length value ... */
} else {
done = 0;
}
} else if (nfields == 1) {
/* handle excessive-length key ... */
break;
} else {
assert(nfields == 2);
/* last key/value pair, not followed by a newline */
}
if (key[0] == '=') {
/* handle missing key ... */
break;
}
/* successfully read a key / value pair; truncate trailing whitespace */
for (i = strlen(key); key[--i] == ' '; ) {
/* nothing */
}
key[i + 1] ='\0';
for (i = strlen(value); value[--i] == ' '; ) {
/* nothing */
}
value[i + 1] ='\0';
/* record the key / value pair somewhere (but here we just print it) ... */
printf("key: [%s] value: [%s]\n", key, value);
} while (!done);
fclose(config);
return 0;
}
Important points to note about that include:
No mechanism for storing the key / value pairs is provided. I gave you a few options, and there are others, but you must decide what's best for your own purposes. Rather, the program above addresses the problem of parsing your config data once for all, so that you can avoid parsing it de novo every time you perform a lookup.
The code relies on fscanf() to consume any leading whitespace before the key and value, but in order to accommodate internal whitespace in the key and value, it cannot do the same for trailing whitespace.
Instead, it manually trims trailing whitespace from key and value.
The fscanf() format uses explicit field widths to avoid buffer overruns. It uses the %[ and %c field descriptors to scan data that may be or include whitespace.
Although it may look longish, do note how much of that code is dedicated to error handling.
Divide and conquer.
Getting the data and parsing it are best handled with 2 separate routines.
1) Use fgets() or other code with read() to read a line
int foo(FILE *inf) {
char buffer[1000];
while (fgets(buffer, sizeof buffer, inf)) {
if (Parse_KeyValue(buffer, &key_offset, &value_offset)) {
fprintf(stderr, "Bad Line '%s'\n", buffer);
return 1;
}
printf("'%s'='%s'\n", &buffer[key_offset], &buffer[value_offset]);
}
}
2) Parse the line. (Sample unchecked code)
// 0: Success
// 1: failure
int Parse_KeyValue(char *line, size_t *key_offset, size_t *value_offset) {
char *p = line;
while (isspace((unsigned char) *p)) p++;
*key_offset = p - line;
const char *end = p;
while (*p != '=') {
if (*p == '\0') return 1; // fail, no `=` found
if (!isspace((unsigned char) *p)) {
end = p+1;
}
p++;
}
*end = '\0';
p++; // consume `=`
while (isspace((unsigned char) *p)) p++;
*value_offset = p - line;
end = p;
while (*p) {
if (!isspace((unsigned char) *p)) {
end = p+1;
}
p++;
}
*end = '\0';
return 0;
}
This does allow for valid "" key and value. Adjust as needed.

Detecting and skipping line comments with Flex

How can I detect one line comments like // in Flex and skip those lines?
Also, for /* comments, will the following snippet be enough?
"/*" { comment(); }
%%
comment()
{
char c, c1;
loop:
while ((c = input()) != '*' && c != 0)
putchar(c);
if ((c1 = input()) != '/' && c != 0)
{
unput(c1);
goto loop;
}
if (c != 0)
putchar(c1);
}
Why don't you just use regular expressions to recognize the comments? The whole point of lex/flex is to save you from having to write lexical scanners by hand. The code you present should work (if you put the pattern /* at the beginning of the line), but it's a bit ugly, and it is not obvious that it will work.
Your question says that you want to skip comments, but the code you provide uses putchar() to print the comment, except for the /* at the beginning. Which is it that you want to do? If you want to echo the comments, you can use an ECHO action instead of doing nothing.
Here are the regular expressions:
Single line comment
This one is easy because in lex/flex, . won't match a newline. So the following will match from // to the end of the line, and then do nothing.
"//".* { /* DO NOTHING */ }
Multiline comment
This is a bit trickier, and the fact that * is a regular expression character as well as a key part of the comment marker makes the following regex a bit hard to read. I use [*] as a pattern which recognizes the character *; in flex/lex, you can use "*" instead. Use whichever you find more readable. Essentially, the regular expression matches sequences of characters ending with a (string of) * until it finds one where the next character is a /. In other words, it has the same logic as your C code.
[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] { /* DO NOTHING */ }
The above requires the terminating */; an unterminated comment will force the lexer to back up to the beginning of the comment and accept some other token, usually a / division operator. That's likely not what you want, but it's not easy to recover from an unterminated comment since there's no really good way to know where the comment should have ended. Consequently, I recommend adding an error rule:
[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] { /* DO NOTHING */ }
[/][*] { fatal_error("Unterminated comment"); }
For // you can read until you find the end of line \n or EOF, in case if the comment was at the end of file, for example:
static void
skip_single_line_comment(void)
{
int c;
/* Read until we find \n or EOF */
while((c = input()) != '\n' && c != EOF)
;
/* Maybe you want to place back EOF? */
if(c == EOF)
unput(c);
}
as for multiple lines comments /* */, you can read until you see * and peek the next character, if it's / this means this is the end of comment, if not just skip it with any other character. You shouldn't expect EOF, means unclosed comment:
static void
skip_multiple_line_comment(void)
{
int c;
for(;;)
{
switch(input())
{
/* We expect ending the comment first before EOF */
case EOF:
fprintf(stderr, "Error unclosed comment, expect */\n");
exit(-1);
goto done;
break;
/* Is it the end of comment? */
case '*':
if((c = input()) == '/')
goto done;
unput(c);
break;
default:
/* skip this character */
break;
}
}
done:
/* exit entry */ ;
}
Complete file:
%{
#include <stdio.h>
static void skip_single_line_comment(void);
static void skip_multiple_line_comment(void);
%}
%option noyywrap
%%
"//" { puts("short comment was skipped ");
skip_single_line_comment();}
"/*" { puts("long comment begins ");
skip_multiple_line_comment();
puts("long comment ends");}
" " { /* empty */ }
[\n|\r\n\t] { /* empty */ }
. { fprintf(stderr, "Tokenizing error: '%c'\n", *yytext);
yyterminate(); }
%%
static void
skip_single_line_comment(void)
{
int c;
/* Read until we find \n or EOF */
while((c = input()) != '\n' && c != EOF)
;
/* Maybe you want to place back EOF? */
if(c == EOF)
unput(c);
}
static void
skip_multiple_line_comment(void)
{
int c;
for(;;)
{
switch(input())
{
/* We expect ending the comment first before EOF */
case EOF:
fprintf(stderr, "Error unclosed comment, expect */\n");
exit(-1);
goto done;
break;
/* Is it the end of comment? */
case '*':
if((c = input()) == '/')
goto done;
unput(c);
break;
default:
/* skip this character */
break;
}
}
done:
/* exit entry */ ;
}
int main(int argc, char **argv)
{
yylex();
return 0;
}
To detect single line comments :
^"//" printf("This is a comment line\n");
This says any line which starts with // will be considered as comment line.
To detect multi line comments :
^"/*"[^*]*|[*]*"*/" printf("This is a Multiline Comment\n");
*
Explanation :
*
^"/*" This says beginning should be /*.
[^*]* includes all characters including \n but excludes *.
[*]* says 0 or more number of stars.
[^*]|[*]* - "or" operator is applied to get any string.
"*/" specifies */ as end.
This will work perfectly in lex.
Below is the complete code of lex file :
%{
#include <stdio.h>
int v=0;
%}
%%
^"//" printf("This is a comment line\n");
^"/*"[^*]*|[*]*"*/" printf("This is a Multiline Comment\n");
.|\n {}
%%
int yywrap()
{
return 1;
}
main()
{
yylex();
}

Echo All Palindromes, in C

I love the ideas presented in Brian Kernighan and Rob Pike's book, "The UNIX Programming Environment," where they focus on the point of working within an environment where you can put together many (small, precise, well understood) programs on the command line to accomplish many programming tasks.
I'm brushing up on strict ANSI C conventions and trying to stick to this philosophy. Somewhere in this book (I can get an exact page number if needed) they suggest that all programs in this environment should adhere to the following principles:
If input is presented on the command line, as an argument to the program itself, process that input.
If no input is presented on the command line, process input from stdin.
Here's a C program I wrote that will echo any input (numeric or alphabetic) that is a palindrome. My question specifically:
Is this a well behaved C program? In other words, is this what Kernighan and Pike were suggesting is the optimal behavior for a command line application like this?
#include <stdio.h>
#include <string.h> /* for strlen */
int main(int argc, char* argv[]) {
char r_string[100];
if (argc > 1) {
int length = (int)strlen(argv[1]);
int i = 0;
int j = length;
r_string[j] = (char)NULL;
j--;
for (i = 0; i < length; i++, j--) {
r_string[j] = argv[1][i];
}
if (strcmp(argv[1], r_string) == 0) {
printf("%s\n", argv[1]);
}
} else {
char* i_string;
while (scanf("%s", i_string) != EOF) {
int length = (int)strlen(i_string);
int i = 0;
int j = length;
r_string[j] = (char)NULL;
j--;
for (i = 0; i < length; i++, j--) {
r_string[j] = i_string[i];
}
if (strcmp(i_string, r_string) == 0) {
printf("%s\n", i_string);
}
}
}
return 0;
}
Yes, I think that you are following the R&K advice. As Hugo said, you could take the argumentas a filename, bu,t IMHO, for this simple program, I'd say that taking the parameter as the palindrome itself may make more sense.
Also, if you allow me extra advice, I would separate the functionality of reading a string from checking whether it is a palindrome or not, because you have that code duplicated right now.
int ispalindrome(const char* c) {
size_t len = strlen(c);
size_t limit = len/2;
size_t i;
for (i = 0; i < limit; i++) {
if(c[i]!=c[len-i-1]) break; /* Different character found */
}
return i==limit; /* If we reached limit, it's a palyndrome */
}
Of course, I am pretty sure this can be improved (it may even have a bug, I am typping quite fast), but once that you have your string, be either from command line or user input, you can call this function or a functiom like this.
NOTE: Edited to reflect comment from Mark, thanks a lot, Mark!
One problem that you have is a potential buffer overflow because you are writing an input of arbitrary length into a buffer with a fixed size. You can fix this by rejecting too long inputs or creating an array of the correct size dynamically. I would avoid using scanf.
Regarding the actual algorithm, you don't need to copy the string reversed and then compare the two strings. You could do the check using only a single copy of the string and a pointer at both ends, both moving in towards the middle.
Here is some code to show the principle:
char* a = /* pointer to first character in string */;
char* b = /* pointer to last character in string (excluding the null terminator) */;
while (a < b && *a == *b)
{
a++;
b--;
}
if (a >= b)
{
// Is palindrome.
}
I agree with Javier that you factor the palindrome checking code out into a separate function.
Regarding the principles you specified, I believe that these tools usually take their arguments as filenames whose content is to be processed. Instead, you are treating them like the input itself.
Take sort, for example. If you don't specify any arguments, the contents from stdin will be sorted. Otherwise, the contents in the file whose filename you specified will be sorted. It is not the arguments themselves that are processed.
The code for this would be something along these lines:
FILE * input = stdin;
if (argc > 1)
{
input = fopen(argv[1], "r");
// handle possible errors from the fopen
}
while (fscanf(input, "%s", i_string) != EOF)
// check if i_string is a palindrome and output to stdout
Also, you should be careful with the buffer overflow specified by Mark Byers.
You're not handling the string reading correctly. The i_string buffer is not initialized, and even if it were, you're should limit the number of bytes that scanf reads to avoid the mentioned overflow:
char i_string[1000];
while (scanf("999%s", i_string) != EOF)
if (is_palindrome(i_string)) /* Use any function defined in the other answers */
printf("%s\n", i_string);
You must always reserve one more byte (1000 vs 999) to account for the NULL string terminator. If you want to allow arbitrary length strings, I think you'll have to dinamically allocate the buffer, and resize it in case bigger strings are present. This would be slightly more complicated.
It is useful for text filters such as a program that prints only lines with palindromes to specify input files via command line arguments e.g., it allows:
$ palindromes input*.txt # file patterns
$ find -name '*.txt' -print0 | xargs -0 palindromes
It is common convention that is supported by many languages. Below are scripts in Perl, Python, C that has the same usage:
Usage: palindromes [FILE]
Print lines that are polindromes in each FILE.
With no FILE, or when FILE is -, read standard input.
in Perl
#!/usr/bin/perl -w
while (<>) { # read stdin or file(s) specified at command line
$line = $_;
s/^\s+//; # remove leading space
s/\s+$//; # remove trailing space
print $line if $_ eq reverse $_; # print line with a palindrome
}
in Python
#!/usr/bin/env python
import fileinput, sys
for line in fileinput.input(): # read stdin or file(s) specified at command line
s = line.strip() # strip whitespace characters
if s == s[::-1]: # is palindrome
sys.stdout.write(line)
in C
#!/usr/local/bin/tcc -run -Wall
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
enum {
MATCH,
NO_MATCH,
ERROR
};
bool is_palindrome(char *first, char *last) {
/** Whether a line defined by range [first, last) is a palindrome.
`last` points either to '\0' or after the last byte if there is no '\0'.
Leading and trailing spaces are ignored.
All characters including '\0' are allowed
*/
--last; // '\0'
for ( ; first < last && isspace(*first); ++first); // skip leading space
for ( ; first < last && isspace(*last); --last); // skip trailing space
for ( ; first < last; ++first, --last)
if (*first != *last)
return false;
return true;
}
int palindromes(FILE *fp) {
/** Print lines that are palindromes from the file.
Return 0 if any line was selected, 1 otherwise;
if any error occurs return 2
*/
int ret = NO_MATCH;
char *line = NULL;
size_t line_size = 0; // line size including terminating '\0' if any
ssize_t len = -1; // number of characters read, including '\n' if any,
// . but not including the terminating '\0'
while ((len = getline(&line, &line_size, fp)) != -1) {
if (is_palindrome(line, line + len)) {
if (printf("%s", line) < 0) {
ret = ERROR;
break;
}
else
ret = MATCH;
}
}
if (line)
free(line);
else
ret = ERROR;
if (!feof(fp))
ret = ERROR;
return ret;
}
int main(int argc, char* argv[]) {
int exit_code = NO_MATCH;
if (argc == 1) // no input file; read stdin
exit_code = palindromes(stdin);
else {
// process each input file
FILE *fp = NULL;
int ret = 0;
int i;
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-") == 0)
ret = palindromes(stdin);
else if ((fp = fopen(argv[i], "r")) != NULL) {
ret = palindromes(fp);
fclose(fp);
} else {
fprintf(stderr, "%s: %s: could not open: %s\n",
argv[0], argv[i], strerror(errno));
exit_code = ERROR;
}
if (ret == ERROR) {
fprintf(stderr, "%s: %s: error: %s\n",
argv[0], argv[i], strerror(errno));
exit_code = ERROR;
} else if (ret == MATCH && exit_code != ERROR)
// return MATCH if at least one line is a MATCH, propogate error
exit_code = MATCH;
}
}
return exit_code;
}
Exit status is 0 if any line was selected, 1 otherwise;
if any error occurs, the exit status is 2. It uses GNU getline() that allows arbitrary large lines as an input.

Resources