Following is my code to remove comments from a C program. But the comment lines don't get removed. It removes /* and */, but doesn't remove the sentence between this delimiters.
#include <stdio.h>
main(int argc, char *argv[]) {
FILE *fp, *ft;
char ch;
if (argc < 3) {
printf("No file name given");
}
fp = fopen(argv[1], "r");
ft = fopen(argv[2], "w");
if (fp == NULL)
printf("Opening error");
if (ft == NULL)
printf("Opening error");
while (1) {
ch = fgetc(fp);
if (ch == EOF)
break;
if (ch == '/') {
ch = fgetc(fp);
if (ch == '*') {
putc(' ', ft);
}
} else if (ch == '*') {
ch = fgetc(fp);
if (ch == '/') {
putc(' ', ft);
}
} else {
putc(ch, ft);
}
}
fclose(fp);
fclose(ft);
}
Please help me to remove comment lines.
There are multiple issues in your code:
the return type of main should not be omitted. Implicit int is obsolete and no longer allowed by the C Standard. The prototype should be int main(int argc, char *argv[])
if command line arguments were not passed to the program, it should exit after printing the error message, which should be output to stderr instead of stdout.
if the input file cannot be open, the program should not create the output file.
if either fopen failed, the program should stop instead of wandering into the realm of undefined behavior.
ch must have type int instead of char for the test (ch == EOF) to behave correctly.
you correctly identify the sequences /* and */ and replace them by a single , but it will actually remove all other occurrences of / and * and the subsequent character and you do not have any provisions for skipping the characters in between.
main should return 0.
Note also that your method might not identify comments correctly if the sequences /* or */ occur in single line comments or string or character constants. Furthermore, you should also handle escaped newlines (\ at the end of a line) as these may occur between the / and *, concealing the comment start or end sequences.
Here is a modified version that handles these cases:
#include <stdio.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
}
return ch;
}
/* read and write character and string constants */
int skipstr(int cch, FILE *fp, FILE *ft) {
int ch;
putc(cch, ft);
while ((ch = getcpp(fp)) != EOF) {
putc(ch, ft);
if (ch == cch)
return 0;
if (ch == '\\') {
if ((ch = getcpp(fp)) == EOF)
return EOF;
putc(ch, ft);
}
}
return EOF;
}
int main(int argc, char *argv[]) {
FILE *fp, *ft;
int ch;
if (argc < 3) {
fprintf(stderr, "Missing arguments. Need input and output filenames\n");
return 1;
}
if ((fp = fopen(argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s\n", argv[1]);
return 1;
}
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s\n", argv[2]);
return 1;
}
while ((ch = getcpp(fp)) != EOF) {
if (ch == '\'' || ch == '"') {
if (skipstr(ch, fp, ft)) {
fprintf(stderr, "unterminated string or character constant\n");
break;
}
continue;
}
if (ch == '/') {
if ((ch = getcpp(fp)) == '*') {
/* multi-line comment */
int lastc = 0;
while ((ch = getcpp(fp)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "unterminated comment\n");
break;
}
ch = ' ';
} else if (ch == '/') {
/* single-line comment */
while ((ch = getcpp(fp)) != EOF && ch != '\n')
continue;
if (ch == EOF)
break;
} else {
putc('/', ft);
}
}
putc(ch, ft);
}
fclose(fp);
fclose(ft);
return 0;
}
Your code correctly identifies the comment start sequence "/" and the comment end sequence "/" and deletes them but doesn't delete what lays between them (a flag for instance should do it)
/*
* flag = 1 if comment detected
* flag = 0 otherwise
*/
if (flag == 0)
{
putc (ch, ft);
}
If you keep your code as it is, it will delete all '/' not only in comment but anywhere in the file. I can think of at least one bad consequence for this (when calling headers such as <sys/time.h>, <sys/stat.h>, <netinet/in.h>, and so on ).
Since the start and end sequences of comment are two characters wide, I would suggest that you use 2 "cursors" to read fp, as if you are reading 2 characters per loop. Hereafter an example (even though it works, for simplicity and readability reasons it doesn't handle edge cases such as non closed comment before EOF, or EOF right after a closing comment sequence).
#include <stdio.h>
int
main (int argc, char *argv[])
{
FILE *fp, *ft;
char ch, nextc;
if (argc < 3)
{
printf ("No file name given");
}
fp = fopen (argv[1], "r");
ft = fopen (argv[2], "w");
if (fp == NULL)
printf ("Opening error");
if (ft == NULL)
printf ("Opening error");
nextc = fgetc (fp);
while (nextc != EOF)
{
ch = nextc;
nextc = fgetc (fp);
if ((ch == '/') && (nextc == '*'))
{
nextc = fgetc (fp);
while ((ch != '*') && (nextc != '/')) /* unroll until the end of comment*/
{
ch = nextc;
nextc = fgetc (fp);
}
ch = fgetc (fp);
nextc = fgetc (fp);
}
putc (ch, ft);
}
fclose (fp);
fclose (ft);
return 0;
}
Hope this helps.
Your last else part is writing all characters which are not '/' and '*'.
I have changed your code below. *******Additional line ********* shows the changed parts. Try it and inform me?? good luckk...
#include<stdio.h>
main(int argc,char*argv[])
{
FILE *fp,*ft;
char ch;
int flag=0; //**********************additional line********
if(argc<3)
{
printf("No file name given");
}
fp=fopen(argv[1],"r");
ft=fopen(argv[2],"w");
if(fp==NULL)
printf("Opening error");
if(ft==NULL)
printf("Opening error");
while(1)
{
ch=fgetc(fp);
if(ch==EOF)
break;
if(ch=='/')
{
ch=fgetc(fp);
if(ch=='*')
{
flag=1; //**********************additional line********
putc(' ',ft);
}
}
else if (ch=='*')
{
ch=fgetc(fp);
if(ch=='/')
{
flag=0;//**********************additional line********
putc(' ',ft);
}
}
if(flag==0) //**********************additional line********
{
putc(ch,ft);
}
}
fclose(fp);
fclose(ft);
}
Just change the line in above code
while ((ch != '*') && (nextc != '/'))
to
while (!((ch == '*') && (nextc == '/')))
For this question most of the answers handled only multi line comment (/..../) but there may be single (//....) line comment as well.
So to handle single line comment slight modification is there in krouis 's code.
#include <stdio.h>
int main (int argc, char *argv[])
{
FILE *fp, *ft;
char ch, nextc;
if (argc < 3)
{
printf ("No file name given");
}
fp = fopen (argv[1], "r");
ft = fopen (argv[2], "w");
if (fp == NULL)
printf ("Opening error");
if (ft == NULL)
printf ("Opening error");
nextc = fgetc (fp);
while (nextc != EOF)
{
ch = nextc;
nextc = fgetc (fp);
if ((ch == '/') && (nextc == '*'))
{
ch = fgetc (fp);
nextc = fgetc (fp);
while (!((ch == '*') && (nextc == '/'))) /* unroll until the end of comment*/
{
ch = nextc;
nextc = fgetc (fp);
}
nextc = fgetc (fp);
continue;
}else if((ch=='/') && (nextc == '/')) // block to handle single line comment.
{
nextc = fgetc (fp);
while (!(nextc == '\n')){
nextc = fgetc (fp);
}
nextc = fgetc (fp);
continue;
}
putc (ch, ft);
}
fclose (fp);
fclose (ft);
return 0;
}
/* This file is to remove all comments from a c/c++ source file */
/* Modified by John Dai 2020-05-06 */
#include <stdio.h>
int main (void)
{
char *sourceFile = "D:/Temp/MyCfile.cpp"; //your source code
char *outputFile = "D:/Temp/MyCfileWoComments.cpp"; //output file
FILE *fp, *ft;
char ch, nextc;
fp = fopen (sourceFile, "r");
ft = fopen (outputFile, "w");
if (fp == NULL) {printf ("Error in opening source file\n"); return 1;}
if (ft == NULL) {printf ("Error in opening output file\n"); return 1;}
nextc = fgetc (fp);
while (nextc != EOF)
{
ch = nextc;
nextc = fgetc (fp);
if ((ch == '/') && (nextc == '/'))
{
nextc = fgetc (fp);
while (nextc != '\n') {// move to the end of line
nextc = fgetc (fp);
}
ch = nextc; //end of line character
nextc = fgetc(fp); //read 1st character from a new line
}
else if ((ch == '/') && (nextc == '*')){
{
nextc = fgetc (fp);
while (!((ch == '*') && (nextc == '/'))) {/* move to the end of comment*/
ch = nextc;
nextc = fgetc (fp);
}
ch = fgetc (fp); //read first character after the end of comment block
nextc = fgetc (fp);
}
}
putc (ch, ft);
}
fclose (fp);
fclose (ft);
return 0;
}
you can try something like this:
#include <stdio.h>
#include <string.h>
#define READ 0
#define SINGLE_LINE_COMMENT 1
#define MULTILINE_COMMENT 2
#define STRING_READ 3
#define CHAR_READ 4
int row = 1;
int col = 0;
int er_line = 0;
int er_col = 0;
void read_source(FILE *src, FILE *dst, int flag, char prev_char, int past_read)
{
if (feof(src))
{
if (flag == STRING_READ)
{
printf("Error : non-terminatig string at line :%d col :%d \n", er_line, er_col);
}
if (flag == CHAR_READ)
{
printf("Error : non-terminatig char constant at line :%d col :%d \n", er_line, er_col);
}
if (flag == MULTILINE_COMMENT)
{
printf("Error : comment reach to end of file at line :%d col :%d \n", er_line, er_col);
}
fclose(src);
fclose(dst);
return;
}
char ch = fgetc(src);
past_read++;
if (ch == '\n')
{
row++;
col = 0;
}
else
{
col++;
}
char next_ch = '\0';
switch (ch)
{
case '\n':
if (flag == SINGLE_LINE_COMMENT)
{
flag = READ;
past_read = 0;
}
else
{
if (flag == STRING_READ)
{
printf("Error : non-terminatig string at line :%d col :%d \n", er_line, er_col);
return;
}
if (flag == CHAR_READ)
{
printf("Error : non-terminatig char constant at line :%d col :%d \n", er_line, er_col);
return;
}
}
break;
case '/':
next_ch = fgetc(src);
if (next_ch == '/')
{
if (flag != STRING_READ && flag != CHAR_READ && flag != SINGLE_LINE_COMMENT && flag != MULTILINE_COMMENT)
{
flag = SINGLE_LINE_COMMENT;
er_line = row;
er_col = col;
past_read = 0;
}
}
else
{
if (next_ch == '*')
{
if (flag != STRING_READ && flag != CHAR_READ && flag != SINGLE_LINE_COMMENT && flag != MULTILINE_COMMENT)
{
flag = MULTILINE_COMMENT;
er_line = row;
er_col = col;
past_read = 0;
}
}
else
{
fseek(src, -1, SEEK_CUR);
}
}
break;
case '"':
if (prev_char != '\\')
{
if (flag == STRING_READ)
{
flag = READ;
past_read = 0;
}
else
{
if (flag != STRING_READ && flag != CHAR_READ && flag != SINGLE_LINE_COMMENT && flag != MULTILINE_COMMENT)
{
flag = STRING_READ;
er_line = row;
er_col = col;
past_read = 0;
}
}
}
break;
case '\'':
if (prev_char != '\\')
{
if (flag == CHAR_READ)
{
flag = READ;
past_read = 0;
}
else
{
if (flag != STRING_READ && flag != CHAR_READ && flag != SINGLE_LINE_COMMENT && flag != MULTILINE_COMMENT)
{
flag = CHAR_READ;
er_line = row;
er_col = col;
past_read = 0;
}
}
}
else
{
if (flag == CHAR_READ)
{
if (past_read > 2)
{
flag = READ;
past_read = 0;
}
}
}
break;
case '*':
if (flag == MULTILINE_COMMENT)
{
next_ch = fgetc(src);
if (next_ch == '/')
{
ch = '\0';
flag = READ;
past_read = 0;
}
else
{
fseek(src, -1, SEEK_CUR);
}
}
break;
}
//to work with char constant
if (flag == CHAR_READ)
{
if (ch != '\\')
{
if (prev_char != '\\')
{
if (past_read > 3)
{
printf(" Error : non-terminatig char constant at line :%d col :%d\n", er_line, er_col);
return;
}
}
}
else
{
if (past_read > 3)
{
printf(" Error : non-terminatig char constant at line :%d col :%d\n", er_line, er_col);
return;
}
}
}
if (flag != MULTILINE_COMMENT && flag != SINGLE_LINE_COMMENT && ch != '\0' && ch != EOF)
{
fputc(ch, dst);
}
read_source(src, dst, flag, ch, past_read);
return;
}
int main(int argc, char **argv)
{
FILE *fp = fopen(argv[1], "r");
FILE *fp2 = NULL;
if (fp == NULL)
{
printf("Unable to open file %s\n", argv[1]);
return 0;
}
fp2 = fopen(argv[2], "w");
if (fp2 == NULL)
{
printf("Unable to open file %s\n", argv[2]);
}
read_source(fp, fp2, READ, '\0', 0);
return 0;
}
Related
Is there any way to do uppercase to lowercase and vice versa for COMMENTS?
For example:
*** input_file.c ***
#include <stdio.h>
/* My FIRST program */
void main(void) {
printf("Hello world!\n"); // PRINT Message
}
*** output_file.c ***
#include <stdio.h>
/* mY first PROGRAM */
void main(void) {
printf("Hello world!\n"); // print mESSAGE
}
I've seen codes that lowercase to uppercase strings or chars and vice versa for example with functions help, but is there any similar or any other code that does this work for comments? :
#include <stdio.h>
#include <string.h>
int main()
{
char s[1000];
printf("Enter the string: ");
gets(s);
strlwr(s);
printf("string in lowercase ='%s'\n",s);
return 0;
}
Thank you in advance
Parsing the C syntax is a non trivial task. Here is a small program that strips comments from a C source file. You can modify it to change comments. Hint: start by changing comments to uppercase, then transpose the case of characters.
This program reads the file contents one byte at a time, via a function getcpp that handles the infamous line continuation sequence, \ immediately followed by a newline, and maintains the line number for error messages.
The main() function parses the C syntax including comments, characters constants and string literals. It supports most of the syntax but does not handle trigraphs (an obsolete feature of historical interest only).
As posted, it removes all comments, replacing them with a space or a newline as appropriate. Study the code and see how you can modify it for your purpose. Learning by example is a good method, once you get the program to do what you need, you can try and rewrite one from scratch to hone your skills and make progress.
Here is the code:
/* strip C comments by chqrlie */
#include <errno.h>
#include <stdio.h>
#include <string.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp, int *lineno_p) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
*lineno_p += 1;
}
if (ch == '\n')
*lineno_p += 1;
return ch;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *ft = stdout;
const char *filename = "<stdin>";
int ch, lineno;
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
}
if (argc > 2) {
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: %s\n",
argv[2], strerror(errno));
return 1;
}
}
lineno = 1;
while ((ch = getcpp(fp, &lineno)) != EOF) {
int startline = lineno;
if (ch == '/') {
if ((ch = getcpp(fp, &lineno)) == '/') {
/* single-line comment */
//putc('/', ft);
//putc('/', ft);
while ((ch = getcpp(fp, &lineno)) != EOF && ch != '\n') {
// Do something with the comment character
//putc(ch, ft);
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
break;
}
putc('\n', ft); /* replace comment with newline */
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0;
//putc('/', ft);
//putc('*', ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
// Do something with the comment character
//putc(ch, ft);
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
break;
}
putc(' ', ft); /* replace comment with single space */
continue;
}
putc('/', ft);
/* keep parsing to handle n/"a//"[i] */
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
putc(sep, ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
putc(ch, ft);
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(fp, &lineno)) == EOF)
break;
putc(ch, ft);
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
break;
}
continue;
}
putc(ch, ft);
}
if (fp != stdin)
fclose(fp);
if (ft != stdout)
fclose(ft);
return 0;
}
I wrote and tested this program that does what you ask assuming that the only true C-comments either begin with // and end with '\n' or begin with /* and end with */
It's not overly efficient as it only reads and writes one character at a time, but I think the code is pretty easy to understand:
#include <stdio.h>
#include <stdlib.h>
int main(int args, char *argv[]){
if(args != 3){ //Ensure the program was run with the proper number of arguments
fprintf(stderr,"USAGE: %s <input file> <output file>\n",argv[0]);
exit(EXIT_FAILURE);
}
FILE *in = fopen(argv[1],"r");
FILE *out = fopen(argv[2],"w");
if(!in || !out){ //Ensure both files opened successfully
fprintf(stderr,in ? "File %s unopenable for writing\n" : "File %s unopenable for reading\n",in ? argv[2] : argv[1]);
exit(EXIT_FAILURE);
}
int first,second;
second = fgetc(in);
if(second == EOF) //Input file is empty
exit(EXIT_SUCCESS);
first = second;
enum {line_comment,multiline_comment, string_text, non_comment} status = non_comment; //Keeps track of what type of text we're reading right now
while((second = fgetc(in)) != EOF){
switch(status){
case line_comment: //Flip the case of every letter until we find a newline
if(second == '\n' && first != '\\') //Allow escaped newlines
status = non_comment;
else if(second >= 'A' && second <= 'Z')
second += 'a'-'A';
else if(second >= 'a' && second <='z')
second -= 'a'-'A';
break;
case multiline_comment: //Flip the case of every letter until we find "*/"
if(first == '*' && second == '/') //We found the end of the comment
status = non_comment;
else if(second >= 'A' && second <= 'Z')
second += 'a'-'A';
else if(second >= 'a' && second <= 'z')
second -= 'a'-'A';
break;
case string_text:
if(second == '"' && first != '\\') //Look for end of string but ignore '\"' as those are allowed in strings
status = non_comment;
break;
case non_comment: //Look for the two-character comment beginnings "//" and "/*"
if(first == '/'){
if(second == '/')
status = line_comment;
else if(second == '*')
status = multiline_comment;
}
else if(second == '"' && first != '\\') //Also check for the beginning of a string
status = string_text;
break;
}
fputc(first,out); //Write last round's possibly-modified char to the output file
first = second;
}
fputc(first,out); //Output the last character of the file
exit(EXIT_SUCCESS); //Close all open files
}
I want to partially automate grading of C code (ANSI C99) for a university course. One property I would like to compute is the number of lines per C function (optionally excluding blank and comment lines).
I am aware of several tools that can filter out blank lines and comment lines in a file, but that would only solve half my problem. I want to separate lines that belong to an individual C function.
I have been told a regex will not work. Is there a clever way to use the gcc preprocessor?
Clang has a switch for printing the syntax tree.
For example, if I run
clang -Xclang -ast-dump -fsyntax-only lc.c
on
lc.c:
int main()
{
}
void f()
{
}
I get:
...
|-FunctionDecl 0x558d2c812890 <lc.c:1:1, line:5:1> line:1:5 main 'int ()'
| `-CompoundStmt 0x558d2c812970 <line:2:1, line:5:1>
`-FunctionDecl 0x558d2c8129c8 <line:7:1, line:9:1> line:7:6 f 'void ()'
`-CompoundStmt 0x558d2c812a68 <line:8:1, line:9:1>
If you write a script that extracts the line numbers from those depth=1 CompoundStmt's that preceded by FunctionDecl's (FunctionDecl + CompoundStmt == function definition) and subtract them, you get the line lengths of the your functions minus 1.
The preprocessor is little more than a tokenizer. You need a proper parser for this.
You can address this problem in 2 steps:
write a C parser that can remove comments
instrument this parser to detect function names and bodies and count the meaningful lines of code. You should consider blank lines and lines consisting of braces and punctuation meaningless ({, {, ,, ;...). This will make your count less dependent on the coding style used by the programmer.
Here is help for the first step: a parser that strips comments:
/* strip C comments by chqrlie */
#include <errno.h>
#include <stdio.h>
#include <string.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp, int *lineno_p) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
*lineno_p += 1;
}
if (ch == '\n')
*lineno_p += 1;
return ch;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *ft = stdout;
const char *filename = "<stdin>";
int ch, lineno;
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
}
if (argc > 2) {
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: %s\n",
argv[2], strerror(errno));
return 1;
}
}
lineno = 1;
while ((ch = getcpp(fp, &lineno)) != EOF) {
int startline = lineno;
if (ch == '/') {
if ((ch = getcpp(fp, &lineno)) == '/') {
/* single-line comment */
while ((ch = getcpp(fp, &lineno)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
break;
}
putc('\n', ft); /* replace comment with newline */
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0;
while ((ch = getcpp(fp, &lineno)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
break;
}
putc(' ', ft); /* replace comment with single space */
continue;
}
putc('/', ft);
/* keep parsing to handle n/"a//"[i] */
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
putc(sep, ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
putc(ch, ft);
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(fp, &lineno)) == EOF)
break;
putc(ch, ft);
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
break;
}
continue;
}
putc(ch, ft);
}
if (fp != stdin)
fclose(fp);
if (ft != stdout)
fclose(ft);
return 0;
}
#include <stdio.h>
#include <stdbool.h>
void m_cmnt(FILE *fp) {
int prev;
int ch;
while ((ch = getc(fp)) != EOF) {
if (prev == '*' && ch == '/') {
return;
} else
prev = ch;
}
}
int main(int c, char **arr) {
FILE *fp, *np;
int ch, prev;
bool String = 0;
fp = fopen("test.txt", "r");
np = fopen("temp.txt", "w");
if (fp == NULL) {
printf("Invalid/No Filename given as Argument ! \n");
return 1;
}
while ((ch = getc(fp)) != EOF) {
if (!String) {
if (ch == '/') {
prev = ch;
ch = getc(fp);
switch (ch) {
case '*':
/* if(ch != 'a') putc('h', np); */
m_cmnt(fp);
putc(' ', np);
break;
default:
putc(prev, np);
putc(ch, np);
break;
}
} else
putc(ch, np);
} else
putc(ch, np);
if (ch == '\"' || ch == '\'')
String = !String;
prev = ch;
}
fclose(fp);
fclose(np);
remove(arr[1]);
//rename("temp.txt", arr[1]);
return 0;
}
This is a simple de-commenting C program (which is C pre-processor job). I was struggling adding a feature to write an error message when detecting unterminated comment (/* example) to the standard error stream. The error should say something like Error: line X: unterminated comment where X is the line number the error occurred. I have been trying this for days now and I can't make any progress and I am highly frustrated. So please someone help me with simple and to the point answer.
test.txt
hello\nworld
Me/*some\ncomment*/again
The result of test.txt after the program run should be like
hello
world
me
again
Both of them are in separate line because \n is present in each case. But what I am getting right now is
hello\nworld
Me again
You can modify the m_cmnt() function to output the error message if it encounters EOF while scanning for */:
void m_cmnt(FILE *fp) {
int prev, ch;
for (prev = 0; (ch = getc(fp)) != EOF; prev = ch) {
if (prev == '*' && ch == '/')
return;
}
fprintf(stderr, "error: unterminated comment\n");
}
If you want to output the line number, you must keep track of the line count everywhere.
Note also that you should handle // comments too and parse the strings more accurately, handling escape sequences.
Here is a version with line number handling:
#include <stdio.h>
#include <stdbool.h>
/* skip a C multi-line comment, return the last byte read or EOF */
int m_cmnt(FILE *fp, int *lineno_p) {
int prev, ch, replacement = ' ';
for (prev = 0; (ch = getc(fp)) != EOF; prev = ch) {
if (ch == '\n') {
replacement = '\n';
++*lineno_p;
}
if (prev == '*' && ch == '/')
return replacement;
}
return EOF;
}
int main(int c, char **arr) {
FILE *fp, *np;
int ch;
bool String = 0;
const char *filename = "test.txt";
int lineno = 1;
fp = fopen(filename, "r");
np = fopen("temp.txt", "w");
if (fp == NULL) {
printf("cannot open input file %s\n", filename);
return 1;
}
while ((ch = getc(fp)) != EOF) {
if (ch == '\n')
lineno++;
if (!String) {
if (ch == '/') {
ch = getc(fp);
if (ch == '\n')
lineno++;
if (ch == '*') {
int startline = lineno;
ch = m_cmnt(fp, &lineno);
if (ch == EOF) {
fprintf(stderr, "%s:%d: error: unterminated comment started on line %d\n",
filename, *lineno, startline);
break;
}
putc(ch, np);
} else {
putc('/', np);
putc(ch, np);
}
} else {
putc(ch, np);
}
} else {
putc(ch, np);
}
if (ch == '\"' || ch == '\'')
String = !String;
}
fclose(fp);
fclose(np);
remove(arr[1]);
//rename("temp.txt", arr[1]);
return 0;
}
For illustration, here is a more complete program that handles all special cases for character and string constants and escaped newlines:
/* strip C comments by chqrlie */
#include <errno.h>
#include <stdio.h>
#include <string.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp, int *lineno_p) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
++*lineno_p;
}
if (ch == '\n')
++*lineno_p;
return ch;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *ft = stdout;
const char *filename = "<stdin>";
int ch, lineno;
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
}
if (argc > 2) {
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: %s\n",
argv[2], strerror(errno));
return 1;
}
}
lineno = 1;
while ((ch = getcpp(fp, &lineno)) != EOF) {
int startline = lineno;
if (ch == '/') {
if ((ch = getcpp(fp, &lineno)) == '/') {
/* single-line comment */
while ((ch = getcpp(fp, &lineno)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
break;
}
putc('\n', ft); /* replace comment with newline */
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0, replacement = ' ';
while ((ch = getcpp(fp, &lineno)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
if (ch == '\n')
replacement = '\n';
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
break;
}
putc(replacement, ft); /* replace comment with single space */
continue;
}
putc('/', ft);
/* keep parsing to handle n/"a//"[i] */
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
putc(sep, ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
putc(ch, ft);
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(fp, &lineno)) == EOF)
break;
putc(ch, ft);
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
break;
}
continue;
}
putc(ch, ft);
}
if (fp != stdin)
fclose(fp);
if (ft != stdout)
fclose(ft);
return 0;
}
Right now this code doesn't remove inline comments, how do I change it so it also removes inline comments?
FILE *output;
output = fopen("preprocess_output.c", "w");
while (fgets(line, LINE_LENGTH, file) != NULL)
{
for (int i = 0; i < strlen(line); i++)
{
if (line[i] == '/' && line[i + 1] == '/')
{
comment_lines++;
}
else
{
fprintf(output, line);
}
if (line[i] != '\n' && line[i] != '\t')
{
non_blank++;
break;
}
}
}
Here is a small program that strips C comments in almost all cases.
/* strip C comments by chqrlie */
#include <errno.h>
#include <stdio.h>
#include <string.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp, int *lineno_p) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
*lineno_p += 1;
}
if (ch == '\n')
*lineno_p += 1;
return ch;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *ft = stdout;
const char *filename = "<stdin>";
int ch, lineno;
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
}
if (argc > 2) {
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: %s\n",
argv[2], strerror(errno));
return 1;
}
}
lineno = 1;
while ((ch = getcpp(fp, &lineno)) != EOF) {
int startline = lineno;
if (ch == '/') {
if ((ch = getcpp(fp, &lineno)) == '/') {
/* single-line comment */
while ((ch = getcpp(fp, &lineno)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
break;
}
putc('\n', ft); /* replace comment with newline */
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0;
while ((ch = getcpp(fp, &lineno)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
break;
}
putc(' ', ft); /* replace comment with single space */
continue;
}
putc('/', ft);
/* keep parsing to handle n/"a//"[i] */
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
putc(sep, ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
putc(ch, ft);
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(fp, &lineno)) == EOF)
break;
putc(ch, ft);
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
break;
}
continue;
}
putc(ch, ft);
}
if (fp != stdin)
fclose(fp);
if (ft != stdout)
fclose(ft);
return 0;
}
Since you get a full answer for free, try and learn how the above code handles strings and escaped newlines. There are still some corner cases that are not supported, can you find them?
one such corner case is the code does not parse trigraphs, an obsolescent feature that may be used to hide \ characters.
in the following solution, there is a single pass over the line. If a comment was found (//), we terminate and print it. supporting (/* */) requires more work.
while (fgets(line, LINE_LENGTH, file) != NULL)
{
size_t len = strlen(line);
size_t i;
for (i=0; i<len; i++)
{
if (line[i]=='/' && line[i + 1]=='/')
{
line[i] = '\0';
break;
}
}
fprintf(output, "%s", line);
}
note to two points in addition to the logic:
when printing using printf, always use a format string. If the line contains % it might do unexpected things.
do not put strlen in the condition of a loop. It generates a lot of unnecessary loops to calculate the length.
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
int word_count()
{
FILE *p;
char ch;
int w = 0;
p = fopen("input.txt", "r");
if (p == NULL)
{
printf("Error opening file\n");
exit(0);
}
else
{
ch = fgetc(p);
while (ch != EOF)
{
if (ch == ' ' || ch == '\n')
{
w++;
}
ch = fgetc(p);
}
return w;
}
fclose(p);
}
void main()
{
FILE *fp1, *fp2;
char ch;
void *p;
int i, n;
w = word_count()
fp2 = fopen("output.txt", "w");
if (p == NULL)
{
printf("Error opening file\n");
exit(0);
}
else
{
fprintf("Words\t\t occurrences\t\n);
exit(0);
}
fp1 = fopen("input.txt", "r");
if (p == NULL)
{
printf("Error opening file\n");
exit(0);
}
else
{
for (i = 0; i < w; i++)
{
ch = fgetc(f1);
p = &ch;
p++;
while (ch != EOF)
{
while (strcmp(ch, *p) == 0)
{
n++;
p++;
}
fputc(ch, fp2);
fputc(n, fp2);
}
}
ch++;
}
fclose(fp1);
fclose(fp2);
}
I've been asked by my professor to write a c code that would read a file and find the occurrences of each word. here's the code i tried but it requires that I use something better than pointers, may be a hashtable and keep track of all the words and count their occurrences.
I'm having trouble choosing the right datastructures and since im just a beginner, i need help choosing the datastructures.