I have a main.c file containing one or more preprocessor macros defined:
#include <stdio.h>
#define VALUE 12
int main(void) {
printf("This file is in version %s and contains value %d\n", VERSION, VALUE);
return 0;
}
I want to export a main2.c file with only the #define VERSION "1.0" applied to the original source file.
What I tried:
gcc -DVERSION=\"1.0\" -E will apply ALL the preprocessor directives instead of the single one I want
sed 's/VERSION/\"1.0\"/g' will probably replace more than needed, and will need more work if I need more than a single directive
cppp is a nice tool but may alter the source file a lot. Only supports simple defines with numerical values
Is there any way to execute only parts of preprocessor directives with gcc ?
Partial preprocessing is a nifty idea and exactly what you are looking for. The cppp utility by Brian Raiter only handles #ifdef and #ifndef lines, it does not perform macro substitution as you require.
Here is a utility I just wrote for this purpose: you can define any number of identifiers on the command line with -Didentifier (expands to 1) or -Didentifier= (expands to nothing), -Didentifier=str or simply identifier=str.
It will substitute identifiers only, preserving comments and strings, but some corner cases are not handled, albeit should not be a problem:
no support for non ASCII identifiers.
stdio in #include <stdio.h> will be seen as an identifier that can be substituted.
some numbers will be parsed as 3 tokens: 1.0E+1.
identifiers will not be substituted if they are split on multiple lines with escaped newlines
defining include, ifdef and other preprocessing directives will cause them to be substituted, unlike the C preprocessor
macro argument names may be substituted whereas the C preprocessor would preserve them.
pcpp.c:
/* Partial preprocessing by chqrlie */
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct define_t {
struct define_t *next;
size_t len;
const char *tok;
const char *def;
} define_t;
static void *xmalloc(size_t size) {
void *p = malloc(size);
if (!p) {
fprintf(stderr, "pcpp: cannot allocate memory\n");
exit(1);
}
return p;
}
static void add_define(define_t **defsp, const char *str) {
define_t *dp = xmalloc(sizeof(*dp));
size_t len = strcspn(str, "=");
const char *def = str[len] ? str + len + 1 : "1";
dp->len = len;
dp->tok = str;
dp->def = def;
dp->next = *defsp;
*defsp = dp;
}
struct context {
FILE *fp;
int lineno;
size_t size, pos;
char *buf;
};
static int append_char(struct context *ctx, int ch) {
if (ctx->pos == ctx->size) {
size_t new_size = ctx->size + ctx->size / 2 + 32;
char *new_buf = xmalloc(new_size);
memcpy(new_buf, ctx->buf, ctx->size);
free(ctx->buf);
ctx->buf = new_buf;
ctx->size = new_size;
}
ctx->buf[ctx->pos++] = (char)ch;
return ch;
}
static void flush_context(struct context *ctx, FILE *ft) {
if (ctx->pos) {
fwrite(ctx->buf, ctx->pos, 1, ft);
ctx->pos = 0;
}
}
/* read the next byte from the C source file, handing escaped newlines */
static int getcpp(struct context *ctx) {
int ch;
while ((ch = getc(ctx->fp)) == '\\') {
append_char(ctx, ch);
if ((ch = getc(ctx->fp)) != '\n') {
ungetc(ch, ctx->fp);
return '\\';
}
append_char(ctx, ch);
ctx->lineno += 1;
}
if (ch != EOF)
append_char(ctx, ch);
if (ch == '\n')
ctx->lineno += 1;
return ch;
}
static void ungetcpp(struct context *ctx, int ch) {
if (ch != EOF && ctx->pos > 0) {
ungetc(ch, ctx->fp);
ctx->pos--;
}
}
static int preprocess(const char *filename, FILE *fp, const char *outname, define_t *defs) {
FILE *ft = stdout;
int ch;
struct context ctx[1] = {{ fp, 1, 0, 0, NULL }};
if (outname) {
if ((ft = fopen(outname, "w")) == NULL) {
fprintf(stderr, "pcpp: cannot open output file %s: %s\n",
outname, strerror(errno));
return 1;
}
}
while ((ch = getcpp(ctx)) != EOF) {
int startline = ctx->lineno;
if (ch == '/') {
if ((ch = getcpp(ctx)) == '/') {
/* single-line comment */
while ((ch = getcpp(ctx)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
//break;
}
//putc('\n', ft); /* replace comment with newline */
flush_context(ctx, ft);
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0;
while ((ch = getcpp(ctx)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
//break;
}
//putc(' ', ft); /* replace comment with single space */
flush_context(ctx, ft);
continue;
}
if (ch != '=') {
ungetcpp(ctx, ch);
}
flush_context(ctx, ft);
continue;
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
while ((ch = getcpp(ctx)) != EOF) {
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(ctx)) == EOF)
break;
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, ctx->lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
}
flush_context(ctx, ft);
continue;
}
if (ch == '_' || isalpha(ch)) {
/* identifier or keyword */
define_t *dp;
while (isalnum(ch = getcpp(ctx)) || ch == '_')
continue;
ungetcpp(ctx, ch);
for (dp = defs; dp; dp = dp->next) {
if (dp->len == ctx->pos && !memcmp(dp->tok, ctx->buf, ctx->pos)) {
/* matching symbol */
fputs(dp->def, ft);
ctx->pos = 0;
break;
}
}
flush_context(ctx, ft);
continue;
}
if (ch == '.' || isdigit(ch)) {
/* preprocessing number: should parse precise syntax */
while (isalnum(ch = getcpp(ctx)) || ch == '.')
continue;
ungetcpp(ctx, ch);
flush_context(ctx, ft);
continue;
}
flush_context(ctx, ft);
}
if (outname) {
fclose(ft);
}
free(ctx->buf);
return 0;
}
int main(int argc, char *argv[]) {
char *filename = NULL;
char *outname = NULL;
define_t *defs = NULL;
FILE *fp;
int i;
for (i = 1; i < argc; i++) {
char *arg = argv[i];
if (*arg == '-') {
if (arg[1] == 'h' || arg[1] == '?' || !strcmp(arg, "--help")) {
printf("usage: pcpp [-o FILENAME] [-Dname[=value]] ... [FILE] ...\n");
return 2;
} else
if (arg[1] == 'o') {
if (arg[2]) {
outname = arg + 2;
} else
if (i + 1 < argc) {
outname = argv[++i];
} else {
fprintf(stderr, "pcpp: missing filename for -o\n");
return 1;
}
} else
if (arg[1] == 'D') {
if (arg[2]) {
add_define(&defs, arg + 2);
} else
if (i + 1 < argc) {
add_define(&defs, argv[++i]);
} else {
fprintf(stderr, "pcpp: missing definition for -D\n");
return 1;
}
} else {
fprintf(stderr, "pcpp: bad option: %s\n", arg);
return 1;
}
} else
if (strchr(arg, '=')) {
add_define(&defs, arg);
} else {
filename = arg;
if ((fp = fopen(filename, "r")) == NULL) {
fprintf(stderr, "pcpp: cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
preprocess(filename, fp, outname, defs);
fclose(fp);
}
}
if (!filename) {
preprocess("<stdin>", stdin, outname, defs);
}
return 0;
}
EDIT: This is a non maintainable solution - but it works. Don't use this if you expect your project to grow into several versions over time.
My attempt makes use of preprocessor conditional code and string concatenation (the fact that in C you can do "abc" "def"and it will be trated as "abcdef".
#include <stdio.h>
#ifdef V1
#define VERSION "1"
#define VALUE 99
#else
#define VERSION "2"
#define VALUE 66
#endif
int main(void) {
printf("This file is in version " VERSION " and contains value %d\n", VALUE);
return 0;
}
which prints
>> ~/playground/so$ gcc -DV1 q1.c
>> ~/playground/so$ ./a.out
This file is in version 1 and contains value 99
>> ~/playground/so$ gcc -DV2 q1.c
>> ~/playground/so$ ./a.out
This file is in version 2 and contains value 66
Read about autoconf https://www.gnu.org/software/autoconf/
and maybe even about automaker (if you want to generate makefiles) https://www.gnu.org/software/automake/.
Related
I want to partially automate grading of C code (ANSI C99) for a university course. One property I would like to compute is the number of lines per C function (optionally excluding blank and comment lines).
I am aware of several tools that can filter out blank lines and comment lines in a file, but that would only solve half my problem. I want to separate lines that belong to an individual C function.
I have been told a regex will not work. Is there a clever way to use the gcc preprocessor?
Clang has a switch for printing the syntax tree.
For example, if I run
clang -Xclang -ast-dump -fsyntax-only lc.c
on
lc.c:
int main()
{
}
void f()
{
}
I get:
...
|-FunctionDecl 0x558d2c812890 <lc.c:1:1, line:5:1> line:1:5 main 'int ()'
| `-CompoundStmt 0x558d2c812970 <line:2:1, line:5:1>
`-FunctionDecl 0x558d2c8129c8 <line:7:1, line:9:1> line:7:6 f 'void ()'
`-CompoundStmt 0x558d2c812a68 <line:8:1, line:9:1>
If you write a script that extracts the line numbers from those depth=1 CompoundStmt's that preceded by FunctionDecl's (FunctionDecl + CompoundStmt == function definition) and subtract them, you get the line lengths of the your functions minus 1.
The preprocessor is little more than a tokenizer. You need a proper parser for this.
You can address this problem in 2 steps:
write a C parser that can remove comments
instrument this parser to detect function names and bodies and count the meaningful lines of code. You should consider blank lines and lines consisting of braces and punctuation meaningless ({, {, ,, ;...). This will make your count less dependent on the coding style used by the programmer.
Here is help for the first step: a parser that strips comments:
/* strip C comments by chqrlie */
#include <errno.h>
#include <stdio.h>
#include <string.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp, int *lineno_p) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
*lineno_p += 1;
}
if (ch == '\n')
*lineno_p += 1;
return ch;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *ft = stdout;
const char *filename = "<stdin>";
int ch, lineno;
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
}
if (argc > 2) {
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: %s\n",
argv[2], strerror(errno));
return 1;
}
}
lineno = 1;
while ((ch = getcpp(fp, &lineno)) != EOF) {
int startline = lineno;
if (ch == '/') {
if ((ch = getcpp(fp, &lineno)) == '/') {
/* single-line comment */
while ((ch = getcpp(fp, &lineno)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
break;
}
putc('\n', ft); /* replace comment with newline */
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0;
while ((ch = getcpp(fp, &lineno)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
break;
}
putc(' ', ft); /* replace comment with single space */
continue;
}
putc('/', ft);
/* keep parsing to handle n/"a//"[i] */
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
putc(sep, ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
putc(ch, ft);
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(fp, &lineno)) == EOF)
break;
putc(ch, ft);
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
break;
}
continue;
}
putc(ch, ft);
}
if (fp != stdin)
fclose(fp);
if (ft != stdout)
fclose(ft);
return 0;
}
Stack Overflow! I am on my learning process with the C technology. I have a function which gets an input file, seeks through the file and writes the contents to the output file without the comments.
The function works but it also brakes at some cases.
My Function:
void removeComments(char* input, char* output)
{
FILE* in = fopen(input,"r");
FILE* out = fopen(ouput,"w");
char c;
while((c = fgetc(in)) != EOF)
{
if(c == '/')
{
c = fgetc(in);
if(c == '/')
{
while((c = fgetc(in)) != '\n');
}
else
{
fputc('/', out);
}
}
else
{
fputc(c,out);
}
}
fclose(in);
fclose(out);
}
But when I give a file like this as input:
// Parameters: a, the first integer; b the second integer.
// Returns: the sum.
int add(int a, int b)
{
return a + b; // An inline comment.
}
int sample = sample;
When removing the inline comment it fails to reach the '\n' for some reason and it gives output:
int add(int a, int b)
{
return a + b; }
int sample = sample;
[EDIT]
Thanks for helping me! It works with the case I posted but it brakes in another.
Current code:
FILE* in = fopen(input,"r");
FILE* out = fopen(output,"w");
if (in == NULL) {
printf("cannot read %s\n", input);
return; /* change signature to return 0 ? */
}
if (out == NULL) {
printf("cannot write in %s\n", output);
return; /* change signature to return 0 ? */
}
int c;
int startline = 1;
while((c = fgetc(in)) != EOF)
{
if(c == '/')
{
c = fgetc(in);
if(c == '/')
{
while((c = fgetc(in)) != '\n')
{
if (c == EOF) {
fclose(in);
fclose(out);
return; /* change signature to return 1 ? */
}
}
if (! startline)
fputc('\n', out);
startline = 1;
}
else if (c == EOF)
break;
else {
fputc('/', out);
startline = 0;
}
}
else
{
fputc(c,out);
startline = (c == '\n');
}
}
fclose(in);
fclose(out);
When the file contains division the second variable disappears.
Example:
int divide(int a, int b)
{
return a/b;
}
It gives back:
int divide(int a, int b)
{
return a/;
}
after
while((c = fgetc(in)) != '\n');
you need a fputc('\n', out);
Additional remarks :
In
char c;
while((c = fgetc(in)) != EOF)
c must be an int to manage EOF
Just a typo : ouput must be output to compile
You do not manages well the EOF after you read a '/'
You missed to check the result of the fopen
A proposal :
#include <stdio.h>
void removeComments(char* input, char* output)
{
FILE* in = fopen(input,"r");
FILE* out = fopen(output,"w");
if (in == NULL) {
printf("cannot read %s\n", input);
return; /* change signature to return 0 ? */
}
if (out == NULL) {
printf("cannot write in %s\n", output);
return; /* change signature to return 0 ? */
}
int c;
while((c = fgetc(in)) != EOF)
{
if(c == '/')
{
c = fgetc(in);
if(c == '/')
{
while((c = fgetc(in)) != '\n')
{
if (c == EOF) {
fclose(in);
fclose(out);
return; /* change signature to return 1 ? */
}
}
fputc('\n', out);
}
else if (c == EOF) {
fputc('/', out);
break;
}
else
fputc('/', out);
fputc(c, out);
}
else
{
fputc(c,out);
}
}
fclose(in);
fclose(out);
/* change signature to return 1 ? */
}
int main(int argc, char ** argv)
{
removeComments(argv[1], argv[2]);
}
As Tormund Giantsbane says in a remark it is better to completely remove the line containing only a comment (comment starting on the first column), that new proposal does that :
#include <stdio.h>
void removeComments(char* input, char* output)
{
FILE* in = fopen(input,"r");
FILE* out = fopen(output,"w");
if (in == NULL) {
printf("cannot read %s\n", input);
return; /* change signature to return 0 ? */
}
if (out == NULL) {
printf("cannot write in %s\n", output);
return; /* change signature to return 0 ? */
}
int c;
int startline = 1;
while((c = fgetc(in)) != EOF)
{
if(c == '/')
{
c = fgetc(in);
if(c == '/')
{
while((c = fgetc(in)) != '\n')
{
if (c == EOF) {
fclose(in);
fclose(out);
return; /* change signature to return 1 ? */
}
}
if (! startline)
fputc('\n', out);
startline = 1;
}
else if (c == EOF) {
fputc('/', out);
break;
}
else {
fputc('/', out);
fputc(c, out);
startline = 0;
}
}
else
{
fputc(c,out);
startline = (c == '\n');
}
}
fclose(in);
fclose(out);
/* change signature to return 1 ? */
}
int main(int argc, char ** argv)
{
removeComments(argv[1], argv[2]);
}
Compilation and execution :
pi#raspberrypi:/tmp $ gcc -pedantic -Wextra -g r.c
pi#raspberrypi:/tmp $ cat i
// Parameters: a, the first integer; b the second integer.
// Returns: the sum.
int add(int a, int b)
{
return a + b/c; // An inline comment.
}
int sample = sample;
pi#raspberrypi:/tmp $ ./a.out i o
pi#raspberrypi:/tmp $ cat o
int add(int a, int b)
{
return a + b/c;
}
int sample = sample;
As said by DavidC. in a remark if // is placed in a string the result will not be the expected one, it is also the case in a character even illegal (I mean '//' must not be changed), what about the C comments (/* .. // ... */) etc
When removing the inline comment it fails to reach the '\n' for some reason
Well no, if it failed to reach or see the newline at the end of an inline comment then the program would, presumably, consume the entire rest of the file. What it actually fails to do is write such newlines to the output.
Consider your comment-eating code:
while((c = fgetc(in)) != '\n');
That loop terminates when a newline is read. At that point, the newline, having already been read, is not available to be read from the input again, so your general read / write provisions will not handle it. If you want the such newlines to be preserved, then you need to print them in the comment-handling branch.
Additional notes:
fgetc returns an int, not a char, and you need to handle it as such in order to be able to correctly detect end-of-file.
Your program will go into an infinite loop if the input ends with an inline comment that is not terminated by a newline. Such source is technically non-conforming, but even so, you ought to handle it.
#include <stdio.h>
#include <stdbool.h>
void m_cmnt(FILE *fp) {
int prev;
int ch;
while ((ch = getc(fp)) != EOF) {
if (prev == '*' && ch == '/') {
return;
} else
prev = ch;
}
}
int main(int c, char **arr) {
FILE *fp, *np;
int ch, prev;
bool String = 0;
fp = fopen("test.txt", "r");
np = fopen("temp.txt", "w");
if (fp == NULL) {
printf("Invalid/No Filename given as Argument ! \n");
return 1;
}
while ((ch = getc(fp)) != EOF) {
if (!String) {
if (ch == '/') {
prev = ch;
ch = getc(fp);
switch (ch) {
case '*':
/* if(ch != 'a') putc('h', np); */
m_cmnt(fp);
putc(' ', np);
break;
default:
putc(prev, np);
putc(ch, np);
break;
}
} else
putc(ch, np);
} else
putc(ch, np);
if (ch == '\"' || ch == '\'')
String = !String;
prev = ch;
}
fclose(fp);
fclose(np);
remove(arr[1]);
//rename("temp.txt", arr[1]);
return 0;
}
This is a simple de-commenting C program (which is C pre-processor job). I was struggling adding a feature to write an error message when detecting unterminated comment (/* example) to the standard error stream. The error should say something like Error: line X: unterminated comment where X is the line number the error occurred. I have been trying this for days now and I can't make any progress and I am highly frustrated. So please someone help me with simple and to the point answer.
test.txt
hello\nworld
Me/*some\ncomment*/again
The result of test.txt after the program run should be like
hello
world
me
again
Both of them are in separate line because \n is present in each case. But what I am getting right now is
hello\nworld
Me again
You can modify the m_cmnt() function to output the error message if it encounters EOF while scanning for */:
void m_cmnt(FILE *fp) {
int prev, ch;
for (prev = 0; (ch = getc(fp)) != EOF; prev = ch) {
if (prev == '*' && ch == '/')
return;
}
fprintf(stderr, "error: unterminated comment\n");
}
If you want to output the line number, you must keep track of the line count everywhere.
Note also that you should handle // comments too and parse the strings more accurately, handling escape sequences.
Here is a version with line number handling:
#include <stdio.h>
#include <stdbool.h>
/* skip a C multi-line comment, return the last byte read or EOF */
int m_cmnt(FILE *fp, int *lineno_p) {
int prev, ch, replacement = ' ';
for (prev = 0; (ch = getc(fp)) != EOF; prev = ch) {
if (ch == '\n') {
replacement = '\n';
++*lineno_p;
}
if (prev == '*' && ch == '/')
return replacement;
}
return EOF;
}
int main(int c, char **arr) {
FILE *fp, *np;
int ch;
bool String = 0;
const char *filename = "test.txt";
int lineno = 1;
fp = fopen(filename, "r");
np = fopen("temp.txt", "w");
if (fp == NULL) {
printf("cannot open input file %s\n", filename);
return 1;
}
while ((ch = getc(fp)) != EOF) {
if (ch == '\n')
lineno++;
if (!String) {
if (ch == '/') {
ch = getc(fp);
if (ch == '\n')
lineno++;
if (ch == '*') {
int startline = lineno;
ch = m_cmnt(fp, &lineno);
if (ch == EOF) {
fprintf(stderr, "%s:%d: error: unterminated comment started on line %d\n",
filename, *lineno, startline);
break;
}
putc(ch, np);
} else {
putc('/', np);
putc(ch, np);
}
} else {
putc(ch, np);
}
} else {
putc(ch, np);
}
if (ch == '\"' || ch == '\'')
String = !String;
}
fclose(fp);
fclose(np);
remove(arr[1]);
//rename("temp.txt", arr[1]);
return 0;
}
For illustration, here is a more complete program that handles all special cases for character and string constants and escaped newlines:
/* strip C comments by chqrlie */
#include <errno.h>
#include <stdio.h>
#include <string.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp, int *lineno_p) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
++*lineno_p;
}
if (ch == '\n')
++*lineno_p;
return ch;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *ft = stdout;
const char *filename = "<stdin>";
int ch, lineno;
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
}
if (argc > 2) {
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: %s\n",
argv[2], strerror(errno));
return 1;
}
}
lineno = 1;
while ((ch = getcpp(fp, &lineno)) != EOF) {
int startline = lineno;
if (ch == '/') {
if ((ch = getcpp(fp, &lineno)) == '/') {
/* single-line comment */
while ((ch = getcpp(fp, &lineno)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
break;
}
putc('\n', ft); /* replace comment with newline */
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0, replacement = ' ';
while ((ch = getcpp(fp, &lineno)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
if (ch == '\n')
replacement = '\n';
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
break;
}
putc(replacement, ft); /* replace comment with single space */
continue;
}
putc('/', ft);
/* keep parsing to handle n/"a//"[i] */
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
putc(sep, ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
putc(ch, ft);
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(fp, &lineno)) == EOF)
break;
putc(ch, ft);
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
break;
}
continue;
}
putc(ch, ft);
}
if (fp != stdin)
fclose(fp);
if (ft != stdout)
fclose(ft);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
void handle(FILE *np)// this is to handle newline characters
{
putc('\n', np);
}
/* skip a C multi-line comment, return the last byte read or EOF */
int m_cmnt(FILE *fp, int *lineno_p) {
FILE *np = stdout;
int prev, ch, replacement = ' ';
for (prev = 0; (ch = getc(fp)) != EOF; prev = ch) {
if (prev == '\\' && ch == 'n') {
replacement = '\n';
++*lineno_p;
}
if (prev == '*' && ch == '/')
return replacement;
}
return EOF;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *np = stdout;
int ch,prev;
bool String = 0;
const char *filename = "<stdin>";
int lineno = 1;
fp = fopen(filename, "r");
np = fopen(argv[2], "w");
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: \n",
filename);
exit(EXIT_FAILURE);
}
}
if (argc > 2) {
if ((np = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: \n",
argv[2]);
exit(EXIT_FAILURE);
}
}
while ((ch = getc(fp)) != EOF) {
if (ch == '\n')
lineno++;
/* file pointer currently not inside a string */
if (!String) {
if (ch == '/') {
ch = getc(fp);
if (ch == '\n')
lineno++;
if (ch == '*') {
int startline = lineno;
ch = m_cmnt(fp, &lineno);
if (ch == EOF) {
fprintf(stderr, "%s:%d: error: unterminated comment started on line %d\n",
filename, lineno, startline);
exit(EXIT_FAILURE);
break;
}
putc(ch, np);
} else {
putc('/', np);
putc(ch, np);
}
}
else if ( ch=='\\')/*to handle newline character*/
{
prev=ch ;
ch= getc(fp) ;
switch(ch)
{
case 'n' :
handle(np);
break ;
/*default :
putc(prev , np) ;
putc(ch , np) ;
break ;*/
}
}
else {
putc(ch, np);
}
} else {
putc(ch, np);
}
if (ch == '"' || ch == '\'')
String = !String;
}
fclose(fp);
fclose(np);
//remove(arr[1]);
//rename("temp.txt", arr[1]);
return EXIT_SUCCESS;
}
I have been working on this project for almost more than a week now. I have asked many questions on this site to help me get the desired result.The basics of this program is to remove multiline comments from source file and write the rest to some output file. It also need to to ignore any thing that is inside a string literal or character literal(like escaped characters). Now I have come to finalize it but I still need to achieve this two outputs shown below
INPUT1 = //*SOMECOMMENT*/
OUTPUT1 = /
INPUT2 = "this \"test"/*test*/
OUTOUT2 = "this \"test"
The current(erroneous) output is shown below
INPUT1 = //*SOMECOMMENT*/
OUTPUT1 = //*SOMECOMMENT*/ This is wrong.
INPUT2 = "this \"test"/*test*/
OUTOUT2 = "this \"test"/*test*/ This is also wrong.
The program don't work for the case where a comment comes after a forward slash(/) and the second failure of the program is it don't ignore escape character inside a string or character literal. I need a fix on this two problems please.
If your problem is that you want to read an input stream of characters, divide that stream into tokens, and then emit only a subset of those tokens, I think Lex is exactly the tool you're looking for.
If I understand your comment correctly, the file you're trying to read in and transform is itself C code. So you will need to build up a Lex definition of the C language rules.
A quick search turned up this Lex specification of the ANSI C grammar. I cannot vouch for its accuracy or speak to its licensing. At first glance it seems to only support C89. But it is probably enough to point you in the right direction.
Right now this code doesn't remove inline comments, how do I change it so it also removes inline comments?
FILE *output;
output = fopen("preprocess_output.c", "w");
while (fgets(line, LINE_LENGTH, file) != NULL)
{
for (int i = 0; i < strlen(line); i++)
{
if (line[i] == '/' && line[i + 1] == '/')
{
comment_lines++;
}
else
{
fprintf(output, line);
}
if (line[i] != '\n' && line[i] != '\t')
{
non_blank++;
break;
}
}
}
Here is a small program that strips C comments in almost all cases.
/* strip C comments by chqrlie */
#include <errno.h>
#include <stdio.h>
#include <string.h>
/* read the next byte from the C source file, handing escaped newlines */
int getcpp(FILE *fp, int *lineno_p) {
int ch;
while ((ch = getc(fp)) == '\\') {
if ((ch = getc(fp)) != '\n') {
ungetc(ch, fp);
return '\\';
}
*lineno_p += 1;
}
if (ch == '\n')
*lineno_p += 1;
return ch;
}
int main(int argc, char *argv[]) {
FILE *fp = stdin, *ft = stdout;
const char *filename = "<stdin>";
int ch, lineno;
if (argc > 1) {
if ((fp = fopen(filename = argv[1], "r")) == NULL) {
fprintf(stderr, "Cannot open input file %s: %s\n",
filename, strerror(errno));
return 1;
}
}
if (argc > 2) {
if ((ft = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Cannot open output file %s: %s\n",
argv[2], strerror(errno));
return 1;
}
}
lineno = 1;
while ((ch = getcpp(fp, &lineno)) != EOF) {
int startline = lineno;
if (ch == '/') {
if ((ch = getcpp(fp, &lineno)) == '/') {
/* single-line comment */
while ((ch = getcpp(fp, &lineno)) != EOF && ch != '\n')
continue;
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated single line comment\n",
filename, startline);
break;
}
putc('\n', ft); /* replace comment with newline */
continue;
}
if (ch == '*') {
/* multi-line comment */
int lastc = 0;
while ((ch = getcpp(fp, &lineno)) != EOF) {
if (ch == '/' && lastc == '*') {
break;
}
lastc = ch;
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated comment\n",
filename, startline);
break;
}
putc(' ', ft); /* replace comment with single space */
continue;
}
putc('/', ft);
/* keep parsing to handle n/"a//"[i] */
}
if (ch == '\'' || ch == '"') {
int sep = ch;
const char *const_type = (ch == '"') ? "string" : "character";
putc(sep, ft);
while ((ch = getcpp(fp, &lineno)) != EOF) {
putc(ch, ft);
if (ch == sep)
break;;
if (ch == '\\') {
if ((ch = getcpp(fp, &lineno)) == EOF)
break;
putc(ch, ft);
}
if (ch == '\n') {
fprintf(stderr, "%s:%d: unescaped newline in %s constant\n",
filename, lineno - 1, const_type);
/* This is a syntax error but keep going as if constant was terminated */
break;
}
}
if (ch == EOF) {
fprintf(stderr, "%s:%d: unterminated %s constant\n",
filename, startline, const_type);
break;
}
continue;
}
putc(ch, ft);
}
if (fp != stdin)
fclose(fp);
if (ft != stdout)
fclose(ft);
return 0;
}
Since you get a full answer for free, try and learn how the above code handles strings and escaped newlines. There are still some corner cases that are not supported, can you find them?
one such corner case is the code does not parse trigraphs, an obsolescent feature that may be used to hide \ characters.
in the following solution, there is a single pass over the line. If a comment was found (//), we terminate and print it. supporting (/* */) requires more work.
while (fgets(line, LINE_LENGTH, file) != NULL)
{
size_t len = strlen(line);
size_t i;
for (i=0; i<len; i++)
{
if (line[i]=='/' && line[i + 1]=='/')
{
line[i] = '\0';
break;
}
}
fprintf(output, "%s", line);
}
note to two points in addition to the logic:
when printing using printf, always use a format string. If the line contains % it might do unexpected things.
do not put strlen in the condition of a loop. It generates a lot of unnecessary loops to calculate the length.