Need help to extract comments from c file - c

i just need help of how to extract comments from input text file and paste them in output file using C language in Unix command line. I don't need the code. Just give me favor of guiding me please. Here is what I want.
Input:
If the input file input_0.txt contains
/* This is a single-line C comment */
#include <stdio.h>
/******
* This is a nicely formatted
* multi-line comment.
******/
int main(int argc, char **argv)
{
// This is a C++ comment.
}
Output:
Then the execution of the program would be as follows.
$ ./Comments < input_0.txt
This is a single-line C comment
This is a nicely formatted
multi-line comment.
This is a C++ comment.
Here is my code which i modified from the Respected #David C. Rankin's code.
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024
int main (int argc, char **argv) {
/* Variables for removing comments*/
int ch, i = 0, flag = 0, prev = '\0';
FILE *fp1, *fp2;
char fname[MAX], temp[] = "temp.txt";
/* Variables for removing comments*/
int inmulti = 0,
insingle = 0,
longline = 0;
char buf[MAXC] = "";
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
/* validate file open for reading */
if (!fp) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
/* open the temporary file in write mode */
fp2 = fopen(temp, "w");
/* error handling */
if (!fp2) {
printf("Unable to open temporary file!!\n");
return 0;
}
while (fgets (buf, MAXC, fp)) { /* read upto MAXC into buf */
char *p = buf; /* pointer to buf */
size_t len = strlen (buf); /* get length */
if (longline) { /* is this 2nd read of long line? */
if (insingle) { /* are we in a single comment? */
printf ("%s", buf); /* print it, get next buf */
continue;
}
else /* otherwise, reset insingle flag */
insingle = 0;
}
if (inmulti) { /* are we in a multi-line comment? */
/* (note: you need to check if quoted here) */
if (strstr (buf, "*/")) { /* does buf contain ending? */
inmulti = 0; /* reset inmulti comment */
}
printf ("%s", buf); /* print the line */
continue; /* (note: end can be before end of line) */
}
if (len && buf[len-1] != '\n') /* check if end of line read */
longline = 1; /* if not, set longline */
else
longline = 0; /* or, reset it */
while (*p && *p != '/') p++; /* find start (or end) of comment */
if (!*p) continue; /* none found, get next buf */
if (*(p + 1) == '/') { /* start of single line comment */
/* note: must make sure not part of path here */
insingle = 1; /* set single-line comment flag */
printf ("%s", buf); /* print line */
} /* note: can print from p for comment only */
else if (*(p + 1) == '*') { /* start of multiline comment */
if (!strstr (p + 2, "*/")) { /* check for ending */
inmulti = 1; /* set multiline flag */
}
printf ("%s", buf); /* print the line */
} /* note: can print from p for comment only */
else if (p > buf && *(p - 1) == '*') { /* was / end of multi? */
printf ("%s", buf); /* end of multi line comment */
inmulti = 0;
}
}
rewind(fp);
/* removes comments from the given input file */
prev = fgetc(fp);
while ((ch = fgetc(fp)) != EOF) {
/* flag is 1 - double slash comment */
if (flag == 1) {
/* skip the contents until you detect \n */
if (ch == '\n') {
flag = 0;
prev = fgetc(fp);
}
continue;
}
/* flag is 2 - slash arsterix comment */
if (flag == 2) {
/* skip the contents until you detect asterix slash */
if (ch == '/' && prev == '*') {
flag = 0;
prev = fgetc(fp);
}
continue;
}
/* checking for double slash comment */
if (ch == '/' && prev == '/') {
flag = 1;
} else if (prev == '/' && ch == '*') {
/* slash asterix comment */
flag = 2;
} else {
/* contents outside of comments */
fputc(prev, fp2);
}
prev = ch;
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
/* closing the input file */
fclose(fp);
fclose(fp2);
return 0;
}

Note, to do this correctly, there are many more conditions that need to be checked for (such as "//", "/*" or "*/" appearing as part of a path, or within a string). Making use of regular expressions is also another way to approach this.
If I understand you correctly, and you a looking to parse comment lines of a source file using basic C, then the following is a quick example of reading all lines in a file (which is provided as the 1st argument, or on stdin) and looking for single-line or multi-line comment delimiters.
This is not intended to be complete and cover all corner-cases or cases where the delimiters appear within literals, defines, etc.., but some care has been taken to note where additional code should be added to address those concerns.
The basic approach is to read a line in MAXC (1024 byte chunks) and keeping track of 3 flags. longline meaning the line exceeds MAXC chars and you have read the 2nd (or 3rd, or 4th...) buffer full. inmulti tracking whether you are in a multi-line comment. Finally insingle where you are within a single-line comment that may exceed MAXC chars. The read loops checks and acts based upon the state of the flags, while looking for the end of a multi-line comment (if within one). The code also checks for multi-line comment start and end -- all within one line.
Given those qualifications, you could start with something like:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024
int main (int argc, char **argv) {
int inmulti = 0,
insingle = 0,
longline = 0;
char buf[MAXC] = "";
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read upto MAXC into buf */
char *p = buf; /* pointer to buf */
size_t len = strlen (buf); /* get length */
if (longline) { /* is this 2nd read of long line? */
if (insingle) { /* are we in a single comment? */
printf ("%s", buf); /* print it, get next buf */
continue;
}
else /* otherwise, reset insingle flag */
insingle = 0;
}
if (inmulti) { /* are we in a multi-line comment? */
/* (note: you need to check if quoted here) */
if (strstr (buf, "*/")) { /* does buf contain ending? */
inmulti = 0; /* reset inmulti comment */
}
printf ("%s", buf); /* print the line */
continue; /* (note: end can be before end of line) */
}
if (len && buf[len-1] != '\n') /* check if end of line read */
longline = 1; /* if not, set longline */
else
longline = 0; /* or, reset it */
while (*p && *p != '/') p++; /* find start (or end) of comment */
if (!*p) continue; /* none found, get next buf */
if (*(p + 1) == '/') { /* start of single line comment */
/* note: must make sure not part of path here */
insingle = 1; /* set single-line comment flag */
printf ("%s", buf); /* print line */
} /* note: can print from p for comment only */
else if (*(p + 1) == '*') { /* start of multiline comment */
if (!strstr (p + 2, "*/")) { /* check for ending */
inmulti = 1; /* set multiline flag */
}
printf ("%s", buf); /* print the line */
} /* note: can print from p for comment only */
else if (p > buf && *(p - 1) == '*') { /* was / end of multi? */
printf ("%s", buf); /* end of multi line comment */
inmulti = 0;
}
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
Example Input File
$ cat dat/comments.txt
/* This is a single-line C comment */
#include <stdio.h>
/******
* This is a nicely formatted
* multi-line comment.
******/
int main(int argc, char **argv)
{
// This is a C++ comment.
}
Example Use/Output
$ ./bin/comments <dat/comments.txt
/* This is a single-line C comment */
/******
* This is a nicely formatted
* multi-line comment.
******/
// This is a C++ comment.
note: the worth in an exercise such as this is in the learning value stepping though a long string identifying certain individual characters, as well as in handling various flags and program states as you loop though a file.
Reading Character-by-Character
To switch from a line-oriented approach to a character-oriented approach (and adding a couple of states noted in the comments from chux), you will read the first character (save it), and then read the remaining characters in the file. This provides a way of comparing the previous to current to determine if you are within or without a single-line comment, a multi-line comment or single or double quotes.
Again, this isn't intended to catch every corner case, but the output was updated to not print the opening or closing comment delimiters. (you will need to tweak the printing of * within multi-line comments and quotes within comments to your taste).
Changing from reading with fgets to fgetc, you could do something similar to the following:
#include <stdio.h>
int main (int argc, char **argv) {
int inmulti = 0, /* in multi-line comment flag */
insingle = 0, /* in single-line comment flag */
insquo = 0, /* within single-quotes */
indquo = 0, /* within double-quotes */
c, prev = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if ((prev = fgetc(fp)) == EOF) /* read 1st char */
return 1;
while ((c = fgetc(fp)) != EOF) { /* read remaining */
switch (c) { /* switch on c */
case '/':
if (prev == '/' && !(insquo | indquo))
insingle = 1;
if (prev == '*' && !(insquo | indquo))
inmulti = 0;
break;
case '*':
if (prev == '/' && !(insquo | indquo))
inmulti = 1;
break;
case '\n':
insingle = 0;
if (insingle || inmulti)
putchar (c);
break;
case '\'':
insquo = insquo ? 0 : 1;
break;
case '"':
indquo = indquo ? 0 : 1;
break;
default:
if ((insingle || inmulti) && !(insquo | indquo))
putchar (c);
break;
}
prev = c;
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
putchar ('\n'); /* tidy up with newline */
return 0;
}
Example Use/Output
$ ./bin/commentsfgetc <dat/comments.txt
This is a single-line C comment
This is a nicely formatted
multi-line comment.
This is a C++ comment.
Look things over and let me know if you have questions over how characters are being identified or the code controlled to locate the start and end of the comment blocks.

You can use this shell script to do that, and save to a file comments.txt
cat generic.c | awk '/\/\// {print $0}; /\/\*/ {aux=1}; {if(aux) print $0}; /\*\// {aux=0}' > comments.txt
Good lucky

Related

Going Through multiple columns of a csv file in C

I want to write a program, that reads a very large csv file. I want the file to read the columns by name and then print the entirety of the column. However it only prints out one of the columns in the datalist. So it only prints out the unix timestamp columns out of the entirety of the program. I want the code to be able to print out the other columns as well Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USD
csv file:
Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USD
1605139200.0,2020-11-12,BTCUSD,15710.87,15731.73,15705.58,15710.01,1.655,26014.29
1605052800.0,2020-11-11,BTCUSD,15318,16000,15293.42,15710.87,1727.17,27111049.25
1604966400.0,2020-11-10,BTCUSD,15348.2,15479.49,15100,15318,1600.04,24521694.72
1604880000.0,2020-11-09,BTCUSD,15484.55,15850,14818,15348.2,2440.85,37356362.78
1604793600.0,2020-11-08,BTCUSD,14845.5,15672.1,14715.98,15484.55,987.72,15035324.13
Current code:
#include<stdio.h>
#include<stdlib.h>
void main()
{
char buffer[1001]; //get line
float timestampfile;
FILE *fp;
int i=1; //line
fp = fopen("filename.csv", "r"); //used to read csv
if(!fp)
{
printf("file not found"); //file not found
exit(0);
}
fgets(buffer,1000, fp); //read line
printf("Expected output print the first column:\n");
while(feof(fp) == 0)
{
sscanf(buffer,"%f",&timestampfile); //read data line
printf("%d: %f\n",i,timestampfile); //used to print data
i++;
fgets(buffer, 1000, fp);
}
printf("end of the column");
fclose(fp);
}
Current output:
1: 1605139200.000000
2: 1605052800.000000
3: 1604966400.000000
4: 1604880000.000000
5: 1604793600.000000
end of the column
You have started out in the right direction, but you have stumbled a bit in handling separating the comma separated values. The standard C library provides all you need to handle separating the values.
Simple Implementation Using strtok()
The easiest implementation would be to take the filename to read and the index of column to extract as the first two arguments to your program. Then you could simply discard the heading row and output the requested value for the column index. That could be done with a simple loop that keeps track of the token number while calling strtok(). Recall on the first call to strtok() the variable name for the string is passed as the first parameter, ever successive call passes NULL as the first argument until no more tokens are found.
A short example would be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
#define DELIM ",\n"
int main (int argc, char **argv) {
if (argc < 3) { /* validate filename and column given as arguments */
fprintf (stderr, "usage: %s filename column\n", argv[0]);
return 1;
}
char buf[MAXC]; /* buffer to hold line */
size_t ndx = strtoul (argv[2], NULL, 0); /* column index to retrieve */
FILE *fp = fopen (argv[1], "r"); /* file pointer */
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!fgets (buf, MAXC, fp)) { /* read / discard headings row */
fputs ("error: empty file.\n", stderr);
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read / validate each line */
char *p = buf;
size_t i = 0;
/* loop until the ndx token found */
for (p = strtok(p, DELIM); p && i < ndx; p = strtok (NULL, DELIM))
i++;
if (i == ndx && p) /* validate token found */
puts (p);
else { /* handle error */
fputs ("error: invalid index\n", stderr);
break;
}
}
}
(note: strtok() considers multiple delimiters as a single delimiter. It cannot be used when empty fields are a possibility such as field1,field2,,field4,.... strsep() was suggested as a replacement for strtok() and it does handle empty-fields, but has shortcomings of its own.)
Example Use/Output
first column (index 0):
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 0
1605139200.0
1605052800.0
1604966400.0
1604880000.0
1604793600.0
second column (index 1)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 1
2020-11-12
2020-11-11
2020-11-10
2020-11-09
2020-11-08
thrid column (index 2)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 2
BTCUSD
BTCUSD
BTCUSD
BTCUSD
BTCUSD
forth column (index 3)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 3
15710.87
15318
15348.2
15484.55
14845.5
request out of range:
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 9
error: invalid index
More Involved Example Displaying Headings as Menu
If you wanted to provide a short interface for the user to choose which column to output, you could count the columns available. You can determine the number of commas present (and adding one more provides the number of columns). You can then save the headings to allow the user to select which column to output by allocating column number of pointers and then by allocating storage for each heading and copying the heading to the storage. You can then display the headings as a menu for the user to select from.
After determining which column to print, you simply read each line into your buffer, and then tokenize the line with either strtok() or strcspn() (the downside to strtok() is that it modifies the buffer, so if you need to preserve it, make a copy). strcspn() returns the length of the token, so it provides the advantage of not modifying the original and providing the number of characters in the token. Then you can output the column value and repeat until you run out of lines.
An example would be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
int main (int argc, char **argv) {
char buf[MAXC], *p = buf, **headings = NULL;
size_t cols = 1, ndx = 0, nchr;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!fgets (buf, MAXC, fp)) { /* read / validate headings row */
fputs ("error: empty file.\n", stderr);
return 1;
}
while (*p && (p = strchr (p, ','))) { /* loop counting ',' */
cols++;
p++;
}
p = buf; /* reset p to start of buf */
/* allocate cols pointers for headings */
if (!(headings = malloc (cols * sizeof *headings))) {
perror ("malloc-heading pointers");
return 1;
}
/* loop separating headings, allocate/assign storage for each, copy to storage */
while (*p && *p != '\n' && (nchr = strcspn (p, ",\n"))) {
if (!(headings[ndx] = malloc (nchr + 1))) { /* allocate/validate */
perror ("malloc headings[ndx]");
return 1;
}
memcpy (headings[ndx], p, nchr); /* copy to storage */
headings[ndx++][nchr] = 0; /* nul-terminate */
p += nchr+1; /* advance past ',' */
}
if (ndx != cols) { /* validate ndx equals cols */
fputs ("error: mismatched cols & ndx\n", stderr);
return 1;
}
puts ("\nAvailable Columns:"); /* display available columns */
for (size_t i = 0; i < cols; i++)
printf (" %2zu) %s\n", i, headings[i]);
while (ndx >= cols) { /* get / validate selection */
fputs ("\nSelection: ", stdout);
if (!fgets (buf, MAXC, stdin)) { /* read input (same buffer) */
puts ("(user canceled input)");
return 0;
}
if (sscanf (buf, "%zu", &ndx) != 1 || ndx >= cols) /* convert/validate */
fputs (" error: invalid index.\n", stderr);
}
printf ("\n%s values:\n", headings[ndx]); /* display column name */
while (fgets (buf, MAXC, fp)) { /* loop displaying column */
char column[MAXC];
p = buf;
/* skip forward ndx ',' */
for (size_t col = 0; col < ndx && (p = strchr (p, ',')); col++, p++) {}
/* read column value into column */
if ((nchr = strcspn (p, ",\n"))) {
memcpy (column, p, nchr); /* copy */
column[nchr] = 0; /* nul-terminate */
puts (column); /* output */
}
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
for (size_t i = 0; i < cols; i++) /* free all allocated memory */
free (headings[i]);
free (headings);
}
Example Use/Output
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 1
Date values:
2020-11-12
2020-11-11
2020-11-10
2020-11-09
2020-11-08
Or the open values:
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 3
Open values:
15710.87
15318
15348.2
15484.55
14845.5
Column out of range canceling input with Ctrl + d (Ctrl + z on windows):
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 9
error: invalid index.
Selection: (user canceled input)
Both approaches accomplish the same thing, it all depends on your program needs. Look things over and let me know if you have further questions.
In order to extract more than one field by name, you must get the names of the fields to extract, for example as command line arguments, determine the corresponding columns, and for each line of the CSV file, output the requested columns.
Below is a simple program that extracts columns from a CSV file and produces another CSV file. It does not use strtok() nor strchr() but analyses the line one character at a time to find the starting and ending offset of the columns and acts accordingly. The source file is passed as redirected input and the output can be redirected to a different CSV file.
Here is the code:
#include <stdio.h>
#include <string.h>
int find_header(const char *line, const char *name) {
int len = strlen(name);
int i, n, s;
for (i = n = s = 0;; i++) {
if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
if (len == i - s && !memcmp(line + s, name, len))
return n;
if (line[i] != ',')
return -1;
s = i + 1;
n++;
}
}
}
int main(int argc, char *argv[]) {
char buffer[1002];
int field[argc];
char *name[argc];
int i, n;
if (argc < 2) {
printf("usage: csvcut FIELD1 [FIELD2 ...] < CSVFILE\n");
return 2;
}
// read the input header line
if (!fgets(buffer, sizeof buffer, stdin)) {
fprintf(stderr, "missing header line\n");
return 1;
}
// determine which columns to extract
for (n = 0, i = 1; i < argc; i++) {
int f = find_header(buffer, argv[i]);
if (f < 0) {
fprintf(stderr, "field not found: %s\n", argv[i]);
} else {
name[n] = argv[i];
field[n] = f;
n++;
}
}
// output new header line
for (i = 0; i < n; i++) {
if (i > 0)
putchar(',');
printf("%s", name[i]);
}
putchar('\n');
// parse the records, output the selected fields
while (fgets(buffer, sizeof buffer, stdin)) {
for (i = 0; i < n; i++) {
int j, s, f, start, length;
if (i > 0)
putchar(',');
// find field boundaries
for (j = s = f = start = length = 0;; j++) {
if (buffer[j] == ',' || buffer[j] == '\n' || buffer[j] == '\0') {
if (f == field[i]) {
start = s;
length = j - s;
break;
}
if (buffer[j] != ',')
break;
s = j + 1;
f++;
}
}
printf("%.*s", length, buffer + start);
}
putchar('\n');
}
return 0;
}
Sample run:
./csvcut Date Close < sample.csv
Date,Close 2020-11-12,15710.01
2020-11-11,15710.87
2020-11-10,15318
2020-11-09,15348.2
2020-11-08,15484.55
Note that fields cannot contain embedded commas. The program could be extended to handle quoted contents to support these.

How to skip a comment in c programming with using fopen

I want to ignore/skip the comments in a text file when I use fgets.
The problem is that I only can skip a comment if the first character in a line starts is #. Comments starts with # in my text file. But there are some # in my file.txt that are not the first character of a line, like so;
#Paths
A B #Path between A and B.
D C #Path between C and D.
A is my first node, B is my second node and when # comes I want to ignore the rest of text until the next line. My new node should be D and C etc. I can only use "r" in fopen function.
I have tried fgets but it reads line by line and fgetc doesn't help either.
bool ignore_comments(const char *s)
{
int i = 0;
while (s[i] && isspace(s[i])) i++;
return (i >= 0 && s[i] == '#');
}
FILE *file;
char ch[BUFSIZE];
file = fopen("e.txt", "r");
if (file == NULL) {
printf("Error\n");
fprintf(stderr, "ERROR: No file input\n");
exit(EXIT_FAILURE);
}
while(fgets(ch, BUFSIZE, file) != NULL)
{
if (line_is_comment(ch)) {
// Ignore comment lines.
continue;
printf("%c",*ch);
}
fscanf(file, "%40[0-9a-zA-Z]s", ch);
....
}
the following proposed code:
performs the desired functionality
cleanly compiles
properly checks for errors
this answer uses a state machine, based on: 'InComment'
and now, the proposed code:
#include <stdio.h>
#include <stdlib.h>
int main( void )
{
int InComment = 0;
FILE *fp = fopen( "file.txt", "r" );
if( !fp )
{
perror( "fopen to read -file.txt- failed" );
exit( EXIT_FAILURE );
}
int ch;
while( (ch = fgetc(fp)) != EOF )
{
if( ch == '#' )
{
InComment = 1;
}
else if( ch == '\n' )
{
InComment = 0;
fputc( ch, stdout );
}
else if( !InComment )
{
fputc( ch, stdout );
}
}
fclose( fp );
}
Also method names are different, but am I right with this version ?
Ignore my dirty method line_is_comment - from first version unless you want to play with ;-)
Extended test input:
#Paths
A B #Path between A and B.
D C #Path between C and D.
E F
G H
Output:
rest of line read
AB rest of line read
DC rest of line read
EF rest of line read
GH rest of line read
#include <stdio.h>
bool line_is_comment(const char *s)
{
char *commentPos = const_cast<char*>(strchr(s, '#'));
if(commentPos != NULL) {
*commentPos = 0; // cut-off chars after comment
//return true; // or false then to accept the line
return commentPos == s;
}
return false;
}
#define BUFSIZE 50
int main()
{
FILE *file;
char ch[BUFSIZE];
file = fopen("e.txt", "r");
if (file == NULL) {
printf("Error\n");
fprintf(stderr, "ERROR: No file input\n");
exit(EXIT_FAILURE);
}
int x;
while(!feof(file)) {
x = fscanf(file, "%40[0-9a-zA-Z]s", ch);
if(x == 0) {
ch[0] = fgetc(file);
if(ch[0] == '#' || ch[0] == '\n') {
if(ch[0] != '\n') fgets(ch, BUFSIZE, file);
printf(" rest of line read\n");
}
} else if(x<0) break;
else {
printf("%c",*ch); // continue with ... undisclosed part here
}
}
return 0;
}
You can also make use of strcspn to trim all comments (and if not present, trim the line-endings from your buffer) in a single simple call. Where you would normally trim the line-ending from the buffer read by fgets() with:
ch[strcspn (ch, "\r\n")] = 0; /* trim line-ending */
You can simply add the "#" character to your reject list and nul-terminate there if a comment is present. That would reduce the complete task of removing comments beginning with '#' and outputting the newly formatted line to:
while (fgets (ch, BUFSIZE, fp)) { /* read every line */
ch[strcspn (ch, "#\r\n")] = 0; /* trim comment or line-ending */
puts (ch); /* output line w/o comment */
}
A short example taking the file to read as the first argument to the program (or reading from stdin by default if no argument is given), you could do:
#include <stdio.h>
#include <string.h>
#define BUFSIZE 1024 /* if you need a constant, #define one (or more) */
int main (int argc, char **argv) {
char ch[BUFSIZE];
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (ch, BUFSIZE, fp)) { /* read every line */
ch[strcspn (ch, "#\r\n")] = 0; /* trim comment or line-ending */
puts (ch); /* output line w/o comment */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
Example Input File
Borrowing Tom's example file :)
$ cat dat/comments_file.txt
#Paths
A B #Path between A and B.
D C #Path between C and D.
E F
G H
Example Use/Output
$ ./bin/comments_remove <dat/comments_file.txt
A B
D C
E F
G H
Look things over and let me know if you have further questions.

How to sum up numbers from each lines in file in c?

I need to sum up the numbers from each line in the file like this e.g.:
1 2 3
10 -1 -3
and the result I should write to another file in each line likes this:
6
6
And I have the problem when in each line after the last number in reading file have more spaces, for example, maybe I use the '_' to show this problem:
When my function works:
10_11_12 '\n'
1_2_3 '\n'
and when my function doesn't work:
10_11_12_ _ _ '\n'
1_2_3 '\n'
I think I know where is the problem, but I have no idea how to fix it.
It's my function here:
int num=0;
char s;
while(fscanf(file, "%d", &num)==1){
fscanf(file, "%c", &s);
sum+=num;
if(s=='\n'){
fprintf(res_file, "%d\n", sum);
sum=0;
}
}
The problem is that fscanf is expecting a pointer to a char. Within your function, you are using a regular char, s.
char s;
You can fix your issue by making s a pointer. First, Allocate memory.
char *s = malloc(sizeof(char) + 1);
Now we can properly scan into the variable, s, and then check for the newline character. The only difference here is now we check for the newline by dereferencing s.
if (*s == '\n')
Don't forget to clean up the memory leak with free()!
free(s);
I was able to get the desired output using the code below.
#include <stdio.h>
#include <stdlib.h>
int processInputFile(char *filename)
{
FILE *ifp;
int buffer = 0;
char *newline = malloc(sizeof(char) + 1);
int sum = 0;
if ((ifp = fopen(filename, "r")) == NULL)
{
fprintf(stderr, "Failed to open \"%s \" in processInputFile.\n", filename);
return -1;
}
while(fscanf(ifp, "%d", &buffer) == 1)
{
fscanf(ifp, "%c", newline);
sum += buffer;
if (*newline == '\n')
{
printf("%d\n", sum);
sum = 0;
}
}
free (newline);
fclose(ifp);
}
int main(int argc, char **argv)
{
if (argc < 2)
{
printf("Proper syntax: ./a.out <n>\n");
return -1;
}
processInputFile(argv[1]);
return 0;
}
Any kind of line-by-line processing in C is easier done by reading the line first, and then processing it. fgets(3) handles end-of-line for you; then you just need to scan what it read. Plus, in the real world, some lines won't scan: either they'll have errors, or your scan won't be general enough. When that happens, it's awfully handy to write the input to standard error, so you can see what you're looking at.
Here's a complete program that does what you want. It assumes lines are less than 80 bytes long and doesn't protect against invalid input, though.
#include <stdio.h>
#include <err.h>
int main( int argc, char *argv[] ) {
char line[80];
static const char *filename = "sum.dat";
FILE *input;
if( (input = fopen(filename, "r")) == NULL ) {
err(1, "could not open %s", filename);
}
for( int nlines = 0;
fgets(line, sizeof(line), input) != NULL;
nlines++ )
{
double value, sum = 0;
int n;
for( char *p = line; sscanf(p, "%lf%n", &value, &n) > 0; p += n ) {
sum += value;
}
printf( "line %d: sum = %lf\n", nlines, sum );
}
return 0;
}
Reading with a line-oriented input function like fgets() or POSIX getline() ensures that a complete line of input is consumed on each call. (don't skimp on buffer size). strtol was created to convert an unknown number of values per-line into long. You walk-a-pointer down your buffer by utilizing the endptr parameter filled by strtol after a successful conversion to point to the next character after the last digit converted.
This allows a simple method to use a pair of pointers, p your start-pointer and ep your end-pointer to work through an entire line converting values as you go. The basic approach is to call strtol, validate it succeeded, and then set p = ep; to advance to the start of your next conversion. strtol ignores leading whitespace.
Putting it altogether, you could do:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
/* (don't skimp on buffer-size) */
int main (int argc, char **argv) {
char buf[MAXC]; /* buffer to hold each line read */
size_t n = 0; /* line-counter */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read each line */
char *p = buf, *ep = p; /* pointer and end-pointer */
int sum = 0; /* variable to hold sum */
if (*buf == '\n') /* ignore empty lines */
continue;
while (*p && *p != '\n') {
errno = 0;
long tmp = strtol (p, &ep, 0); /* convert to temp long */
if (p == ep) { /* validate digits were converted */
fputs ("error: no digits extracted.\n", stderr);
break;
}
else if (errno) { /* validate no under/overflow occurred */
fputs ("error: underflow/overflow occurred.\n", stderr);
break;
}
else if (tmp < INT_MIN || INT_MAX < tmp) { /* validate in range */
fputs ("error: tmp exceeds range of int.\n", stderr);
break;
}
sum += tmp; /* add tmp to sum */
p = ep; /* set p to end-ptr (one past last digit used) */
}
n++; /* advance line counter */
printf ("sum line [%2zu] : %d\n", n, sum); /* output sum */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
(note: the if (*buf == '\n') which tests if the first character in the line is a newline character and simple skips to the next line, no need to worry about converting values in a empty line)
Example Use/Output
Using your data in dat/sumlines.txt produces the expected results.
$ ./bin/sumline dat/sumlines.txt
sum line [ 1] : 6
sum line [ 2] : 6
Let me know if you have further questions.

How to print out a file line by line by using dynamic memory allocation?

how is printing a file line by line done using malloc? Ideally, I want to allocate just enough space to hold a line in the file, print it, free it, and then repeat the process.
Please include code snippets, thank you!
This is what I have so far, but Valgrind is saying I have memory leaks.
int main (int argc, char **argv)
{
// char *line = NULL;
char *line;
FILE *fp = fopen(argv[1], "r");
if (!fp) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
line = malloc(1);
while (readline(fp, line)) {
// printf("%s\n", line);
}
free(line);
if (fp != stdin) fclose (fp);
return 0;
}
char *readline (FILE *fp, char *buffer)
{
char ch;
int place = 0;
size_t nchar = 1;
ch = fgetc(fp);
while (ch != '\n' && ch != EOF)
{
nchar++;
(buffer)[place] = ch;
char *tmp = realloc (buffer, nchar);
if (!tmp) {
fprintf (stderr, "error: realloc failed, "
"returning partial buffer.\n");
(buffer)[place] = 0;
return buffer;
}
buffer = tmp;
ch = fgetc(fp);
place++;
}
(buffer)[place] = '\0'; /* nul-terminate */
if (ch == EOF) {
if (strlen(buffer) > 1) {
printf("%s\n", buffer);
}
// free(buffer);
buffer = NULL;
} else {
if (strlen(buffer) > 1) {
printf("%s\n", buffer);
}
}
return buffer;
}
This is how I would do it:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/******************************************************************************
** Read a line from specified stream into allocated memory.
** (Caller is responsable to free the allocated memory).
**
** Parameters:
** I__fp File stream to read.
** _O_line_A Sets caller's pointer to the allocated memory containing the read line.
** Caller's pointer is set to NULL if a blank line is encountered.
** _O_eof Sets caller's integer value as follows:
** 0 = (FALSE) More lines available to read from file.
** -1 = (TRUE) The end-0f-file was encountered while reading the line.
** Return values:
** 0 Call was successful.
** ENOMEM Could not allocate sufficient memory to read a line.
*/
int ReadLine(
FILE *I__fp,
char **_O_line_A,
int *_O_eof
)
{
int rCode = 0; /* Function return status. 0=SUCCESS. */
char *line_A = NULL; /* Pointer to allocated memory containing line read. */
size_t lineLength = 0; /* Length of the allocated line, not including the termination character. */
int eof = 0; /* End-of-file encountered flag. */
for(;;) /* Loop forever. */
{
int ch;
char *newmem;
ch=fgetc(I__fp); /* Read a single character from the stream. */
if(EOF == ch) /* If the value EOF was read... */
{
eof=(-1); /* ...Set the end-of-file flag */
break; /* ...Break out of the for-loop */
}
if('\n' == ch) /* If a new-line character was read... */
break; /* ...Break out of the for-loop */
/* Allocate additional memory, sufficient to hold all characters so far read,
** plus one for the currently read character,
** plus one for the termination character.
*/
newmem=realloc(line_A, lineLength + 1 + 1);
if(!newmem) /* Ensure that realloc() did not fail. */
{
rCode=ENOMEM;
goto CLEANUP;
}
line_A = newmem; /* Change to newly allocated memory. */
line_A[lineLength++] = ch; /* Append the newly read character to the allocated memory. */
line_A[lineLength] = '\0'; /* Append a string termination character to the allocated memory. */
};
//RESULTS:
/* Set the caller's pointer to the allocated line read from the stream. */
if(_O_line_A) /* Allows the caller to pass in a NULL value for _O_line_A... */
{ /* ...For example, if the content of the line is not wanted by the caller. */
*_O_line_A = line_A;
line_A = NULL;
}
/* Set the caller's eof flag. */
if(_O_eof) /* Allows the caller to pass in a NULL value for _O_eof... */
*_O_eof = eof; /* ...For example, if the caller doesn't need the eof value. */
CLEANUP:
/* It is possible that the caller supplied a "NULL" value for _O_line_A.
** ...If so, line_A will not be NULL.\
** ...In that case, free line_A to eliminate potential memory leak.
*/
if(line_A)
free(line_A);
return(rCode);
}
/******************************************************************************
** Program start.
*/
int main(
int argc,
char **argv)
{
int rCode = 0;
char *line_A = NULL;
FILE *fp_A = NULL;
int eof;
errno=0;
fp_A = fopen(argv[1], "r");
if(!fp_A)
{
fprintf(stderr, "fopen(\"%s\") failed. errno: %d %s\n", argv[1], errno, strerror(errno));
goto CLEANUP;
}
rCode=ReadLine(fp_A, &line_A, &eof); /* Read first line from stream. */
while(!rCode)
{
printf("%s\n", line_A ? line_A : "");
free(line_A); /* Free line memory when finished with it. (after printing, etc.) */
if(eof) /* Break out of the while-loop if this is the last line in the stream. */
break;
rCode=ReadLine(fp_A, &line_A, &eof); /* Read next line from stream. */
}
if(rCode)
fprintf(stderr, "ReadLine() reports: %d %s\n", errno, strerror(errno));
CLEANUP:
if(fp_A && (fp_A != stdin))
{
errno=0;
if(EOF == fclose(fp_A))
fprintf(stderr, "fclose() failed. errno: %d %s\n", errno, strerror(errno));
}
return(rCode);
}

Read file word by word and output WITH white spaces

The input text file is like so:
Hello my
name is
mark.
and
im
going
to
love
c!
Code:
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[]){
FILE *pFile;
char x[60];
pFile = fopen("test0.txt","r");
if(pFile != NULL){
while(fscanf(pFile, " %60s", x) == 1){
printf("%s",x);
}
}
}
Output text file is:
Hellomynameismark.andimgoingtolovec!
I want the Output to be like:
Hello my name is mark. and im going to love c!
Very new C programmer so only know the basics.
Edit----
int main(int argc, char *argv[]){
FILE *pFile;
char x[60],line[60];
pFile = fopen("test0.txt","r");
while(!feof(pFile)){
fgets(line, 60, pFile);
strtok(line, "\r\n");
printf("%s", line );
}
fclose(pFile);
Output:
Hello myname is mark.andim goingtolovec!
This does not leave spaces between new lines. However if I take out the strtok line the output will be like this:
Hello my
name is
mark.
and
im
going
to
love
c!
--Edit
.sp 2
.ce
This is an example file for formatting.
.sp 1
The above line
was formatted using a .ce 1 command, which means 'centre
the following line',
so it should appear in the
centre of the page.
The paragraph was separated from the heading using
a .sp 1 command to create a single blank line.
There should also be two blank lines above the centred heading to make reading it slightly easier.
The simple answer is:
while(fscanf(pFile, " %59[^\n]%*c", x) == 1)
Here %[^\n] uses the character class [stuff] to read everything up to the newline. %*c simply reads and discards the newline without adding it to the match count for fscanf.
However for line-oriented input, you should really use one of the line-oriented functions provided by the standard library (e.g. fgets or POSIX getline).
Using fgets & strtok
As you have taken from the comment, the use of feof is going to cause you nothing but grief. You will want to simply use the return of fgets to determine end of file. Here is an example that puts all the pieces of the puzzle together:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXWDS 20
#define MAXCHR 60
int main (int argc, char **argv) {
char line[MAXCHR] = {0};
char *words[MAXWDS] = {NULL};
FILE *pFile = NULL;
size_t i, index = 0;
/* open file for reading (if provided), or read from stdin */
if (!(pFile = argc > 1 ? fopen (argv[1], "r") : stdin)) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (fgets (line, 60, pFile))
{
char *p = line;
/* split line into tokens, stored in words[] */
for (p = strtok (p, " \r\n"); p; p = strtok (NULL, " \r\n")) {
words[index++] = strdup (p); /* allocate & copy */
if (index == MAXWDS) /* check pointer limit */
break;
}
}
if (pFile != stdin) fclose (pFile);
/* output in a single line */
for (i = 0; i < index; i++) {
printf (" %s", words[i]);
free (words[i]); /* free allocated memory */
}
putchar ('\n');
return 0;
}
Compile
gcc -Wall -Wextra -o bin/fgets_strtok fgets_strtok.c
Output
$ ./bin/fgets_strtok dat/hellomark.txt
Hello my name is mark. and im going to love c!
Note: to simply print the line out with spaces between the words, as long as there is already a space between each of the words in each line, there is no reason to go to the trouble to separate each line into individual words, you can simply print the contents of each line out in a space separate fashion. The only issue you run into using fgets is that it will also read the newline (or carriage return, newline) as part of the string. That is simple to remove. You can replace the entire read loop with:
while (fgets (line, 60, pFile))
{
size_t len = strlen (line);
/* strip trailing newline (or carriage return newline ) */
while (len && (line[len-1] == '\n' || line[len-1] == '\r'))
line[--len] = 0; /* overwrite with null-terminating char */
words[index++] = strdup (line); /* allocate & copy */
if (index == MAXWDS) /* check pointer limit */
break;
}
Output
$ ./bin/fgets_mark <dat/hellomark.txt
Hello my name is mark. and im going to love c!
Standard Way to Read from File Only (not File or stdin)
I apologize for the getting ahead of you a bit by including a way to either open a file (if provided on the command line) or read from stdin (if no filename was provided). The standard way is to first check that the correct number of arguments were provided on the command line, and then open the filename provided, validate it is open, and then process input. What I did was throw a ternary operator into the fopen command that said.
pFile = argc > 1 ? fopen (argv[1], "r") : stdin
The right side of the '=' sign is a ternary operator, which is simply a shorthand for if -> then -> else. What it does is ask is argc > 1? If that tests true, then pFile = fopen (argv[1], "r");. If argc > 1 tests false, then pFile = stdin;
See if the standard way makes more sense:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXWDS 20
#define MAXCHR 60
int main (int argc, char **argv) {
char line[MAXCHR] = {0};
char *words[MAXWDS] = {NULL};
FILE *pFile = NULL;
size_t i, index = 0;
/* validate sufficient input */
if (argc < 2 ) {
fprintf (stderr, "error: insufficient input, usage: %s filename\n", argv[0]);
return 1;
}
/* open file provided on command line for reading */
pFile = fopen (argv[1], "r");
if (!pFile) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (fgets (line, 60, pFile)) /* read each line in file */
{
size_t len = strlen (line);
/* strip trailing newline (or carriage return newline ) */
while (len && (line[len-1] == '\n' || line[len-1] == '\r'))
line[--len] = 0; /* overwrite with null-terminating char */
words[index++] = strdup (line); /* allocate & copy */
if (index == MAXWDS) /* check pointer limit */
break;
}
if (pFile != stdin) fclose (pFile);
/* output in a single line */
for (i = 0; i < index; i++) {
printf (" %s", words[i]);
free (words[i]); /* free allocated memory */
}
putchar ('\n');
return 0;
}
A simple state machine does the trick - no line length limitation.
#include <stdio.h>
int main(void) {
FILE *pFile = fopen("test0.txt","r");
if(pFile != NULL) {
int previous_isspace = 0;
int ch;
for (;;) {
ch = fgetc(pFile);
if (ch == EOF) break;
if (isspace(ch)) {
previous_isspace = 1;
} else {
if (previous_isspace == 1) {
fputc(' ', stdout);
}
previous_isspace = 0;
fputc(ch, stdout);
}
}
fclose(pFile);
fputc('\n', stdout); // If code should have a \n at the end
}
}
I think, It is sufficient take a look am i miss anything.
if(pFile != NULL){
// while(fscanf(pFile, " %60s", x) == 1){
while (fgets(x, sizeof(x), pFile) != NULL) {
token = strtok(x,"\r\n");
if(token != NULL)
printf("%s ",x);
else
printf("%s",x);
}
fclose(pFile);
}

Resources