I want to write a program, that reads a very large csv file. I want the file to read the columns by name and then print the entirety of the column. However it only prints out one of the columns in the datalist. So it only prints out the unix timestamp columns out of the entirety of the program. I want the code to be able to print out the other columns as well Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USD
csv file:
Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USD
1605139200.0,2020-11-12,BTCUSD,15710.87,15731.73,15705.58,15710.01,1.655,26014.29
1605052800.0,2020-11-11,BTCUSD,15318,16000,15293.42,15710.87,1727.17,27111049.25
1604966400.0,2020-11-10,BTCUSD,15348.2,15479.49,15100,15318,1600.04,24521694.72
1604880000.0,2020-11-09,BTCUSD,15484.55,15850,14818,15348.2,2440.85,37356362.78
1604793600.0,2020-11-08,BTCUSD,14845.5,15672.1,14715.98,15484.55,987.72,15035324.13
Current code:
#include<stdio.h>
#include<stdlib.h>
void main()
{
char buffer[1001]; //get line
float timestampfile;
FILE *fp;
int i=1; //line
fp = fopen("filename.csv", "r"); //used to read csv
if(!fp)
{
printf("file not found"); //file not found
exit(0);
}
fgets(buffer,1000, fp); //read line
printf("Expected output print the first column:\n");
while(feof(fp) == 0)
{
sscanf(buffer,"%f",×tampfile); //read data line
printf("%d: %f\n",i,timestampfile); //used to print data
i++;
fgets(buffer, 1000, fp);
}
printf("end of the column");
fclose(fp);
}
Current output:
1: 1605139200.000000
2: 1605052800.000000
3: 1604966400.000000
4: 1604880000.000000
5: 1604793600.000000
end of the column
You have started out in the right direction, but you have stumbled a bit in handling separating the comma separated values. The standard C library provides all you need to handle separating the values.
Simple Implementation Using strtok()
The easiest implementation would be to take the filename to read and the index of column to extract as the first two arguments to your program. Then you could simply discard the heading row and output the requested value for the column index. That could be done with a simple loop that keeps track of the token number while calling strtok(). Recall on the first call to strtok() the variable name for the string is passed as the first parameter, ever successive call passes NULL as the first argument until no more tokens are found.
A short example would be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
#define DELIM ",\n"
int main (int argc, char **argv) {
if (argc < 3) { /* validate filename and column given as arguments */
fprintf (stderr, "usage: %s filename column\n", argv[0]);
return 1;
}
char buf[MAXC]; /* buffer to hold line */
size_t ndx = strtoul (argv[2], NULL, 0); /* column index to retrieve */
FILE *fp = fopen (argv[1], "r"); /* file pointer */
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!fgets (buf, MAXC, fp)) { /* read / discard headings row */
fputs ("error: empty file.\n", stderr);
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read / validate each line */
char *p = buf;
size_t i = 0;
/* loop until the ndx token found */
for (p = strtok(p, DELIM); p && i < ndx; p = strtok (NULL, DELIM))
i++;
if (i == ndx && p) /* validate token found */
puts (p);
else { /* handle error */
fputs ("error: invalid index\n", stderr);
break;
}
}
}
(note: strtok() considers multiple delimiters as a single delimiter. It cannot be used when empty fields are a possibility such as field1,field2,,field4,.... strsep() was suggested as a replacement for strtok() and it does handle empty-fields, but has shortcomings of its own.)
Example Use/Output
first column (index 0):
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 0
1605139200.0
1605052800.0
1604966400.0
1604880000.0
1604793600.0
second column (index 1)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 1
2020-11-12
2020-11-11
2020-11-10
2020-11-09
2020-11-08
thrid column (index 2)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 2
BTCUSD
BTCUSD
BTCUSD
BTCUSD
BTCUSD
forth column (index 3)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 3
15710.87
15318
15348.2
15484.55
14845.5
request out of range:
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 9
error: invalid index
More Involved Example Displaying Headings as Menu
If you wanted to provide a short interface for the user to choose which column to output, you could count the columns available. You can determine the number of commas present (and adding one more provides the number of columns). You can then save the headings to allow the user to select which column to output by allocating column number of pointers and then by allocating storage for each heading and copying the heading to the storage. You can then display the headings as a menu for the user to select from.
After determining which column to print, you simply read each line into your buffer, and then tokenize the line with either strtok() or strcspn() (the downside to strtok() is that it modifies the buffer, so if you need to preserve it, make a copy). strcspn() returns the length of the token, so it provides the advantage of not modifying the original and providing the number of characters in the token. Then you can output the column value and repeat until you run out of lines.
An example would be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
int main (int argc, char **argv) {
char buf[MAXC], *p = buf, **headings = NULL;
size_t cols = 1, ndx = 0, nchr;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!fgets (buf, MAXC, fp)) { /* read / validate headings row */
fputs ("error: empty file.\n", stderr);
return 1;
}
while (*p && (p = strchr (p, ','))) { /* loop counting ',' */
cols++;
p++;
}
p = buf; /* reset p to start of buf */
/* allocate cols pointers for headings */
if (!(headings = malloc (cols * sizeof *headings))) {
perror ("malloc-heading pointers");
return 1;
}
/* loop separating headings, allocate/assign storage for each, copy to storage */
while (*p && *p != '\n' && (nchr = strcspn (p, ",\n"))) {
if (!(headings[ndx] = malloc (nchr + 1))) { /* allocate/validate */
perror ("malloc headings[ndx]");
return 1;
}
memcpy (headings[ndx], p, nchr); /* copy to storage */
headings[ndx++][nchr] = 0; /* nul-terminate */
p += nchr+1; /* advance past ',' */
}
if (ndx != cols) { /* validate ndx equals cols */
fputs ("error: mismatched cols & ndx\n", stderr);
return 1;
}
puts ("\nAvailable Columns:"); /* display available columns */
for (size_t i = 0; i < cols; i++)
printf (" %2zu) %s\n", i, headings[i]);
while (ndx >= cols) { /* get / validate selection */
fputs ("\nSelection: ", stdout);
if (!fgets (buf, MAXC, stdin)) { /* read input (same buffer) */
puts ("(user canceled input)");
return 0;
}
if (sscanf (buf, "%zu", &ndx) != 1 || ndx >= cols) /* convert/validate */
fputs (" error: invalid index.\n", stderr);
}
printf ("\n%s values:\n", headings[ndx]); /* display column name */
while (fgets (buf, MAXC, fp)) { /* loop displaying column */
char column[MAXC];
p = buf;
/* skip forward ndx ',' */
for (size_t col = 0; col < ndx && (p = strchr (p, ',')); col++, p++) {}
/* read column value into column */
if ((nchr = strcspn (p, ",\n"))) {
memcpy (column, p, nchr); /* copy */
column[nchr] = 0; /* nul-terminate */
puts (column); /* output */
}
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
for (size_t i = 0; i < cols; i++) /* free all allocated memory */
free (headings[i]);
free (headings);
}
Example Use/Output
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 1
Date values:
2020-11-12
2020-11-11
2020-11-10
2020-11-09
2020-11-08
Or the open values:
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 3
Open values:
15710.87
15318
15348.2
15484.55
14845.5
Column out of range canceling input with Ctrl + d (Ctrl + z on windows):
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 9
error: invalid index.
Selection: (user canceled input)
Both approaches accomplish the same thing, it all depends on your program needs. Look things over and let me know if you have further questions.
In order to extract more than one field by name, you must get the names of the fields to extract, for example as command line arguments, determine the corresponding columns, and for each line of the CSV file, output the requested columns.
Below is a simple program that extracts columns from a CSV file and produces another CSV file. It does not use strtok() nor strchr() but analyses the line one character at a time to find the starting and ending offset of the columns and acts accordingly. The source file is passed as redirected input and the output can be redirected to a different CSV file.
Here is the code:
#include <stdio.h>
#include <string.h>
int find_header(const char *line, const char *name) {
int len = strlen(name);
int i, n, s;
for (i = n = s = 0;; i++) {
if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
if (len == i - s && !memcmp(line + s, name, len))
return n;
if (line[i] != ',')
return -1;
s = i + 1;
n++;
}
}
}
int main(int argc, char *argv[]) {
char buffer[1002];
int field[argc];
char *name[argc];
int i, n;
if (argc < 2) {
printf("usage: csvcut FIELD1 [FIELD2 ...] < CSVFILE\n");
return 2;
}
// read the input header line
if (!fgets(buffer, sizeof buffer, stdin)) {
fprintf(stderr, "missing header line\n");
return 1;
}
// determine which columns to extract
for (n = 0, i = 1; i < argc; i++) {
int f = find_header(buffer, argv[i]);
if (f < 0) {
fprintf(stderr, "field not found: %s\n", argv[i]);
} else {
name[n] = argv[i];
field[n] = f;
n++;
}
}
// output new header line
for (i = 0; i < n; i++) {
if (i > 0)
putchar(',');
printf("%s", name[i]);
}
putchar('\n');
// parse the records, output the selected fields
while (fgets(buffer, sizeof buffer, stdin)) {
for (i = 0; i < n; i++) {
int j, s, f, start, length;
if (i > 0)
putchar(',');
// find field boundaries
for (j = s = f = start = length = 0;; j++) {
if (buffer[j] == ',' || buffer[j] == '\n' || buffer[j] == '\0') {
if (f == field[i]) {
start = s;
length = j - s;
break;
}
if (buffer[j] != ',')
break;
s = j + 1;
f++;
}
}
printf("%.*s", length, buffer + start);
}
putchar('\n');
}
return 0;
}
Sample run:
./csvcut Date Close < sample.csv
Date,Close 2020-11-12,15710.01
2020-11-11,15710.87
2020-11-10,15318
2020-11-09,15348.2
2020-11-08,15484.55
Note that fields cannot contain embedded commas. The program could be extended to handle quoted contents to support these.
Related
I need to sum up the numbers from each line in the file like this e.g.:
1 2 3
10 -1 -3
and the result I should write to another file in each line likes this:
6
6
And I have the problem when in each line after the last number in reading file have more spaces, for example, maybe I use the '_' to show this problem:
When my function works:
10_11_12 '\n'
1_2_3 '\n'
and when my function doesn't work:
10_11_12_ _ _ '\n'
1_2_3 '\n'
I think I know where is the problem, but I have no idea how to fix it.
It's my function here:
int num=0;
char s;
while(fscanf(file, "%d", &num)==1){
fscanf(file, "%c", &s);
sum+=num;
if(s=='\n'){
fprintf(res_file, "%d\n", sum);
sum=0;
}
}
The problem is that fscanf is expecting a pointer to a char. Within your function, you are using a regular char, s.
char s;
You can fix your issue by making s a pointer. First, Allocate memory.
char *s = malloc(sizeof(char) + 1);
Now we can properly scan into the variable, s, and then check for the newline character. The only difference here is now we check for the newline by dereferencing s.
if (*s == '\n')
Don't forget to clean up the memory leak with free()!
free(s);
I was able to get the desired output using the code below.
#include <stdio.h>
#include <stdlib.h>
int processInputFile(char *filename)
{
FILE *ifp;
int buffer = 0;
char *newline = malloc(sizeof(char) + 1);
int sum = 0;
if ((ifp = fopen(filename, "r")) == NULL)
{
fprintf(stderr, "Failed to open \"%s \" in processInputFile.\n", filename);
return -1;
}
while(fscanf(ifp, "%d", &buffer) == 1)
{
fscanf(ifp, "%c", newline);
sum += buffer;
if (*newline == '\n')
{
printf("%d\n", sum);
sum = 0;
}
}
free (newline);
fclose(ifp);
}
int main(int argc, char **argv)
{
if (argc < 2)
{
printf("Proper syntax: ./a.out <n>\n");
return -1;
}
processInputFile(argv[1]);
return 0;
}
Any kind of line-by-line processing in C is easier done by reading the line first, and then processing it. fgets(3) handles end-of-line for you; then you just need to scan what it read. Plus, in the real world, some lines won't scan: either they'll have errors, or your scan won't be general enough. When that happens, it's awfully handy to write the input to standard error, so you can see what you're looking at.
Here's a complete program that does what you want. It assumes lines are less than 80 bytes long and doesn't protect against invalid input, though.
#include <stdio.h>
#include <err.h>
int main( int argc, char *argv[] ) {
char line[80];
static const char *filename = "sum.dat";
FILE *input;
if( (input = fopen(filename, "r")) == NULL ) {
err(1, "could not open %s", filename);
}
for( int nlines = 0;
fgets(line, sizeof(line), input) != NULL;
nlines++ )
{
double value, sum = 0;
int n;
for( char *p = line; sscanf(p, "%lf%n", &value, &n) > 0; p += n ) {
sum += value;
}
printf( "line %d: sum = %lf\n", nlines, sum );
}
return 0;
}
Reading with a line-oriented input function like fgets() or POSIX getline() ensures that a complete line of input is consumed on each call. (don't skimp on buffer size). strtol was created to convert an unknown number of values per-line into long. You walk-a-pointer down your buffer by utilizing the endptr parameter filled by strtol after a successful conversion to point to the next character after the last digit converted.
This allows a simple method to use a pair of pointers, p your start-pointer and ep your end-pointer to work through an entire line converting values as you go. The basic approach is to call strtol, validate it succeeded, and then set p = ep; to advance to the start of your next conversion. strtol ignores leading whitespace.
Putting it altogether, you could do:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
/* (don't skimp on buffer-size) */
int main (int argc, char **argv) {
char buf[MAXC]; /* buffer to hold each line read */
size_t n = 0; /* line-counter */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read each line */
char *p = buf, *ep = p; /* pointer and end-pointer */
int sum = 0; /* variable to hold sum */
if (*buf == '\n') /* ignore empty lines */
continue;
while (*p && *p != '\n') {
errno = 0;
long tmp = strtol (p, &ep, 0); /* convert to temp long */
if (p == ep) { /* validate digits were converted */
fputs ("error: no digits extracted.\n", stderr);
break;
}
else if (errno) { /* validate no under/overflow occurred */
fputs ("error: underflow/overflow occurred.\n", stderr);
break;
}
else if (tmp < INT_MIN || INT_MAX < tmp) { /* validate in range */
fputs ("error: tmp exceeds range of int.\n", stderr);
break;
}
sum += tmp; /* add tmp to sum */
p = ep; /* set p to end-ptr (one past last digit used) */
}
n++; /* advance line counter */
printf ("sum line [%2zu] : %d\n", n, sum); /* output sum */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
(note: the if (*buf == '\n') which tests if the first character in the line is a newline character and simple skips to the next line, no need to worry about converting values in a empty line)
Example Use/Output
Using your data in dat/sumlines.txt produces the expected results.
$ ./bin/sumline dat/sumlines.txt
sum line [ 1] : 6
sum line [ 2] : 6
Let me know if you have further questions.
i just need help of how to extract comments from input text file and paste them in output file using C language in Unix command line. I don't need the code. Just give me favor of guiding me please. Here is what I want.
Input:
If the input file input_0.txt contains
/* This is a single-line C comment */
#include <stdio.h>
/******
* This is a nicely formatted
* multi-line comment.
******/
int main(int argc, char **argv)
{
// This is a C++ comment.
}
Output:
Then the execution of the program would be as follows.
$ ./Comments < input_0.txt
This is a single-line C comment
This is a nicely formatted
multi-line comment.
This is a C++ comment.
Here is my code which i modified from the Respected #David C. Rankin's code.
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024
int main (int argc, char **argv) {
/* Variables for removing comments*/
int ch, i = 0, flag = 0, prev = '\0';
FILE *fp1, *fp2;
char fname[MAX], temp[] = "temp.txt";
/* Variables for removing comments*/
int inmulti = 0,
insingle = 0,
longline = 0;
char buf[MAXC] = "";
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
/* validate file open for reading */
if (!fp) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
/* open the temporary file in write mode */
fp2 = fopen(temp, "w");
/* error handling */
if (!fp2) {
printf("Unable to open temporary file!!\n");
return 0;
}
while (fgets (buf, MAXC, fp)) { /* read upto MAXC into buf */
char *p = buf; /* pointer to buf */
size_t len = strlen (buf); /* get length */
if (longline) { /* is this 2nd read of long line? */
if (insingle) { /* are we in a single comment? */
printf ("%s", buf); /* print it, get next buf */
continue;
}
else /* otherwise, reset insingle flag */
insingle = 0;
}
if (inmulti) { /* are we in a multi-line comment? */
/* (note: you need to check if quoted here) */
if (strstr (buf, "*/")) { /* does buf contain ending? */
inmulti = 0; /* reset inmulti comment */
}
printf ("%s", buf); /* print the line */
continue; /* (note: end can be before end of line) */
}
if (len && buf[len-1] != '\n') /* check if end of line read */
longline = 1; /* if not, set longline */
else
longline = 0; /* or, reset it */
while (*p && *p != '/') p++; /* find start (or end) of comment */
if (!*p) continue; /* none found, get next buf */
if (*(p + 1) == '/') { /* start of single line comment */
/* note: must make sure not part of path here */
insingle = 1; /* set single-line comment flag */
printf ("%s", buf); /* print line */
} /* note: can print from p for comment only */
else if (*(p + 1) == '*') { /* start of multiline comment */
if (!strstr (p + 2, "*/")) { /* check for ending */
inmulti = 1; /* set multiline flag */
}
printf ("%s", buf); /* print the line */
} /* note: can print from p for comment only */
else if (p > buf && *(p - 1) == '*') { /* was / end of multi? */
printf ("%s", buf); /* end of multi line comment */
inmulti = 0;
}
}
rewind(fp);
/* removes comments from the given input file */
prev = fgetc(fp);
while ((ch = fgetc(fp)) != EOF) {
/* flag is 1 - double slash comment */
if (flag == 1) {
/* skip the contents until you detect \n */
if (ch == '\n') {
flag = 0;
prev = fgetc(fp);
}
continue;
}
/* flag is 2 - slash arsterix comment */
if (flag == 2) {
/* skip the contents until you detect asterix slash */
if (ch == '/' && prev == '*') {
flag = 0;
prev = fgetc(fp);
}
continue;
}
/* checking for double slash comment */
if (ch == '/' && prev == '/') {
flag = 1;
} else if (prev == '/' && ch == '*') {
/* slash asterix comment */
flag = 2;
} else {
/* contents outside of comments */
fputc(prev, fp2);
}
prev = ch;
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
/* closing the input file */
fclose(fp);
fclose(fp2);
return 0;
}
Note, to do this correctly, there are many more conditions that need to be checked for (such as "//", "/*" or "*/" appearing as part of a path, or within a string). Making use of regular expressions is also another way to approach this.
If I understand you correctly, and you a looking to parse comment lines of a source file using basic C, then the following is a quick example of reading all lines in a file (which is provided as the 1st argument, or on stdin) and looking for single-line or multi-line comment delimiters.
This is not intended to be complete and cover all corner-cases or cases where the delimiters appear within literals, defines, etc.., but some care has been taken to note where additional code should be added to address those concerns.
The basic approach is to read a line in MAXC (1024 byte chunks) and keeping track of 3 flags. longline meaning the line exceeds MAXC chars and you have read the 2nd (or 3rd, or 4th...) buffer full. inmulti tracking whether you are in a multi-line comment. Finally insingle where you are within a single-line comment that may exceed MAXC chars. The read loops checks and acts based upon the state of the flags, while looking for the end of a multi-line comment (if within one). The code also checks for multi-line comment start and end -- all within one line.
Given those qualifications, you could start with something like:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024
int main (int argc, char **argv) {
int inmulti = 0,
insingle = 0,
longline = 0;
char buf[MAXC] = "";
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read upto MAXC into buf */
char *p = buf; /* pointer to buf */
size_t len = strlen (buf); /* get length */
if (longline) { /* is this 2nd read of long line? */
if (insingle) { /* are we in a single comment? */
printf ("%s", buf); /* print it, get next buf */
continue;
}
else /* otherwise, reset insingle flag */
insingle = 0;
}
if (inmulti) { /* are we in a multi-line comment? */
/* (note: you need to check if quoted here) */
if (strstr (buf, "*/")) { /* does buf contain ending? */
inmulti = 0; /* reset inmulti comment */
}
printf ("%s", buf); /* print the line */
continue; /* (note: end can be before end of line) */
}
if (len && buf[len-1] != '\n') /* check if end of line read */
longline = 1; /* if not, set longline */
else
longline = 0; /* or, reset it */
while (*p && *p != '/') p++; /* find start (or end) of comment */
if (!*p) continue; /* none found, get next buf */
if (*(p + 1) == '/') { /* start of single line comment */
/* note: must make sure not part of path here */
insingle = 1; /* set single-line comment flag */
printf ("%s", buf); /* print line */
} /* note: can print from p for comment only */
else if (*(p + 1) == '*') { /* start of multiline comment */
if (!strstr (p + 2, "*/")) { /* check for ending */
inmulti = 1; /* set multiline flag */
}
printf ("%s", buf); /* print the line */
} /* note: can print from p for comment only */
else if (p > buf && *(p - 1) == '*') { /* was / end of multi? */
printf ("%s", buf); /* end of multi line comment */
inmulti = 0;
}
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
Example Input File
$ cat dat/comments.txt
/* This is a single-line C comment */
#include <stdio.h>
/******
* This is a nicely formatted
* multi-line comment.
******/
int main(int argc, char **argv)
{
// This is a C++ comment.
}
Example Use/Output
$ ./bin/comments <dat/comments.txt
/* This is a single-line C comment */
/******
* This is a nicely formatted
* multi-line comment.
******/
// This is a C++ comment.
note: the worth in an exercise such as this is in the learning value stepping though a long string identifying certain individual characters, as well as in handling various flags and program states as you loop though a file.
Reading Character-by-Character
To switch from a line-oriented approach to a character-oriented approach (and adding a couple of states noted in the comments from chux), you will read the first character (save it), and then read the remaining characters in the file. This provides a way of comparing the previous to current to determine if you are within or without a single-line comment, a multi-line comment or single or double quotes.
Again, this isn't intended to catch every corner case, but the output was updated to not print the opening or closing comment delimiters. (you will need to tweak the printing of * within multi-line comments and quotes within comments to your taste).
Changing from reading with fgets to fgetc, you could do something similar to the following:
#include <stdio.h>
int main (int argc, char **argv) {
int inmulti = 0, /* in multi-line comment flag */
insingle = 0, /* in single-line comment flag */
insquo = 0, /* within single-quotes */
indquo = 0, /* within double-quotes */
c, prev = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if ((prev = fgetc(fp)) == EOF) /* read 1st char */
return 1;
while ((c = fgetc(fp)) != EOF) { /* read remaining */
switch (c) { /* switch on c */
case '/':
if (prev == '/' && !(insquo | indquo))
insingle = 1;
if (prev == '*' && !(insquo | indquo))
inmulti = 0;
break;
case '*':
if (prev == '/' && !(insquo | indquo))
inmulti = 1;
break;
case '\n':
insingle = 0;
if (insingle || inmulti)
putchar (c);
break;
case '\'':
insquo = insquo ? 0 : 1;
break;
case '"':
indquo = indquo ? 0 : 1;
break;
default:
if ((insingle || inmulti) && !(insquo | indquo))
putchar (c);
break;
}
prev = c;
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
putchar ('\n'); /* tidy up with newline */
return 0;
}
Example Use/Output
$ ./bin/commentsfgetc <dat/comments.txt
This is a single-line C comment
This is a nicely formatted
multi-line comment.
This is a C++ comment.
Look things over and let me know if you have questions over how characters are being identified or the code controlled to locate the start and end of the comment blocks.
You can use this shell script to do that, and save to a file comments.txt
cat generic.c | awk '/\/\// {print $0}; /\/\*/ {aux=1}; {if(aux) print $0}; /\*\// {aux=0}' > comments.txt
Good lucky
So i'm working on a class project that is going to manage a stores inventory. The program will need to remove/add products as well as update stock counts. When the program runs the structure array needs to be initialized by a inventory file that its values are separated by commas which saves all the data after each use.
Here is the file data.
1000,1.49,3.79,10,0,Fish Food
2000,0.29,1.59,100,1,Angelfish
2001,0.09,0.79,200,1,Guppy
5000,2.40,5.95,10,0,Dog Collar (Large)
6000,49.99,129.99,3,1,Dalmatian Puppy
Here is the structure layout.
struct inventory_s
{
int productNumber;
float mfrPrice;
float retailPrice;
int numInStock;
char liveInv;
char productName[PRODUCTNAME_SZ];
};
And here is the code
int fileData()
{
FILE* pFile;
char *buf = malloc(MAX_INVENTORY); // MAX INVENTORY = 50
char *info;
if ( ( pFile = fopen( "inventory.txt", "r" ) ) == NULL ) //Reading a file
{
printf( "File could not be opened.\n" );
}
int i = 0;
while (fgets(buf, MAX_INVENTORY, pFile) != NULL)
{
if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
buf[strlen (buf) - 1] = '\0';
info = strtok(buf, ",");
inventory[i].productNumber = atoi(info);
info = strtok(buf, ",");
inventory[i].mfrPrice = atof(info);
info = strtok(NULL, ",");
inventory[i].retailPrice = atof(info);
info = strtok(NULL, ",");
inventory[i].numInStock = atoi(info);
info = strtok(NULL, ",");
strcpy(inventory[i].liveInv, info);
info = strtok(NULL, ",");
strcpy(inventory[i].productName, info);
printf("%i, %f, %f, %i, %s, %s \n", inventory[i].productNumber , inventory[i].mfrPrice, inventory[i].retailPrice , inventory[i].numInStock, inventory[i].liveInv, inventory[i].productName );
i++;
}
fclose(pFile);
return 0;
}
Right now I dont get anything if I run the code, but if I run only up to the first initialization
info = strtok(buf, ",");
inventory[i].productNumber = atoi(info);
I get all the first values assigned correctly.
Continuing from the comment, It is hard to tell without seeing your entire code where the issue may be. However, the thing that jumps out is there is no need to strip the trailing '\n' from buf before calling strtok. You are better served by using a delimiter of ",\n" and letting strtok handle the final string in each line.
A short example would be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { PRODUCTNAME_SZ = 32, MAXINV = 128, MAXC = 512 };
typedef struct {
int productNumber;
float mfrPrice;
float retailPrice;
int numInStock;
char liveInv;
char productName[PRODUCTNAME_SZ];
} inventory_s;
int main (int argc, char **argv) {
inventory_s *inv = NULL;
char buf[MAXC] = "", *delim = ",\n";
size_t allocsz = MAXINV, idx = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if (!(inv = malloc (sizeof *inv * allocsz))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read each line */
char *p = buf;
size_t val = 0; /* tokenize line with strtok */
for (p = strtok (p, delim); p; p = strtok (NULL, delim))
switch (val) { /* switch controls assignment */
case 0 : inv[idx].productNumber = (int)strtol (p, NULL, 10);
val++; break;
case 1 : inv[idx].mfrPrice = strtof (p, NULL);
val++; break;
case 2 : inv[idx].retailPrice = strtof (p, NULL);
val++; break;
case 3 : inv[idx].numInStock = (int)strtol (p, NULL, 10);
val++; break;
case 4 : inv[idx].liveInv = (char)strtol (p, NULL, 10);
val++; break;
case 5 : strncpy (inv[idx].productName, p, PRODUCTNAME_SZ);
inv[idx].productName[PRODUCTNAME_SZ-1] = 0; break;
}
if (val != 5) { /* validate all struct members filled */
fprintf (stderr, "error: incomplete parse of values line: %zu\n",
idx+1);
return 1;
}
if (++idx == allocsz) { /* realloc when allocsz reached */
void *tmp = realloc (inv, (allocsz + MAXINV) * sizeof *inv);
if (!tmp) {
fprintf (stderr, "error: realloc - virtual memory exhausted.\n");
break; /* leave read loop preserving exhisting data in inv */
}
inv = tmp;
allocsz += MAXINV;
}
}
for (size_t i = 0; i < idx; i++)
printf ("%5d %6.2f %7.2f %4d %d %s\n", inv[i].productNumber,
inv[i].mfrPrice, inv[i].retailPrice, inv[i].numInStock,
inv[i].liveInv, inv[i].productName);
free (inv); /* free allocated memory */
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
note: if your liveInv value is really just a single digit, then you can replace (char)strtol (p, NULL, 10); with simply *p - '0';.
(You may need to set PRODUCTNAME_SZ larger depending on your full dataset)
Example Use/Output
$ ./bin/inventory <dat/inventory.txt
1000 1.49 3.79 10 0 Fish Food
2000 0.29 1.59 100 1 Angelfish
2001 0.09 0.79 200 1 Guppy
5000 2.40 5.95 10 0 Dog Collar (Large)
Look it over and let me know if you have any questions.
I'm a Java Programmer but Now Building Its so called Java Collections in C &C++. This code is a Part of My Project "Big Data Analysis Using C & C++".
I already had the code, so, modified for your data. It can take any no. of columns. Just pass opened file pointer & separation type.
You can get a particular column just by removing all the if part of j (ex. if(j==1), etc) to your particular column. Most people forget this. :-D
#include <iostream>
#include <stdio.h>
int MAX=1000; // Size of Single String
void analyse(FILE *fp, char separation) {
/* fp is file pointer (already opened)
* separation is how data is separated (' ' or '\t' or ',')
*/
char str[MAX],ch;
int i=-1;
long j=0;
// J is Column. This Can Be Applied For Any No. of Columns.
cout<<"Reading File.."<<endl;
while((ch=getc(fp))!=EOF) {
i++;
if(ch!=separation && ch!='\n')
str[i] = ch;
else {
str[i] = '\0';
if(ch == separation || ch == '\n') {
j++;
}
// At Each if of j, you can write you code to store them.
if(j == 1) {
cout<<"Product No. : "<<str<<endl;
}
if(j == 2) {
cout<<"MFR Price : "<<str<<endl;
}
if(j == 3) {
cout<<"Retail : "<<str<<endl;
}
if(j == 4) {
cout<<"Num in Stock : "<<str<<endl;
}
if(j == 5) {
cout<<"LiveInv : "<<str<<endl;
}
if(j == 6) {
cout<<"Product Name : "<<str<<endl;
}
cout<<"-------------------------------------"<<endl;
if(ch == '\n')
j = 0;
i = -1;
}
}
cout<<"Reading Completed.."<<endl;
}
The input text file is like so:
Hello my
name is
mark.
and
im
going
to
love
c!
Code:
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[]){
FILE *pFile;
char x[60];
pFile = fopen("test0.txt","r");
if(pFile != NULL){
while(fscanf(pFile, " %60s", x) == 1){
printf("%s",x);
}
}
}
Output text file is:
Hellomynameismark.andimgoingtolovec!
I want the Output to be like:
Hello my name is mark. and im going to love c!
Very new C programmer so only know the basics.
Edit----
int main(int argc, char *argv[]){
FILE *pFile;
char x[60],line[60];
pFile = fopen("test0.txt","r");
while(!feof(pFile)){
fgets(line, 60, pFile);
strtok(line, "\r\n");
printf("%s", line );
}
fclose(pFile);
Output:
Hello myname is mark.andim goingtolovec!
This does not leave spaces between new lines. However if I take out the strtok line the output will be like this:
Hello my
name is
mark.
and
im
going
to
love
c!
--Edit
.sp 2
.ce
This is an example file for formatting.
.sp 1
The above line
was formatted using a .ce 1 command, which means 'centre
the following line',
so it should appear in the
centre of the page.
The paragraph was separated from the heading using
a .sp 1 command to create a single blank line.
There should also be two blank lines above the centred heading to make reading it slightly easier.
The simple answer is:
while(fscanf(pFile, " %59[^\n]%*c", x) == 1)
Here %[^\n] uses the character class [stuff] to read everything up to the newline. %*c simply reads and discards the newline without adding it to the match count for fscanf.
However for line-oriented input, you should really use one of the line-oriented functions provided by the standard library (e.g. fgets or POSIX getline).
Using fgets & strtok
As you have taken from the comment, the use of feof is going to cause you nothing but grief. You will want to simply use the return of fgets to determine end of file. Here is an example that puts all the pieces of the puzzle together:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXWDS 20
#define MAXCHR 60
int main (int argc, char **argv) {
char line[MAXCHR] = {0};
char *words[MAXWDS] = {NULL};
FILE *pFile = NULL;
size_t i, index = 0;
/* open file for reading (if provided), or read from stdin */
if (!(pFile = argc > 1 ? fopen (argv[1], "r") : stdin)) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (fgets (line, 60, pFile))
{
char *p = line;
/* split line into tokens, stored in words[] */
for (p = strtok (p, " \r\n"); p; p = strtok (NULL, " \r\n")) {
words[index++] = strdup (p); /* allocate & copy */
if (index == MAXWDS) /* check pointer limit */
break;
}
}
if (pFile != stdin) fclose (pFile);
/* output in a single line */
for (i = 0; i < index; i++) {
printf (" %s", words[i]);
free (words[i]); /* free allocated memory */
}
putchar ('\n');
return 0;
}
Compile
gcc -Wall -Wextra -o bin/fgets_strtok fgets_strtok.c
Output
$ ./bin/fgets_strtok dat/hellomark.txt
Hello my name is mark. and im going to love c!
Note: to simply print the line out with spaces between the words, as long as there is already a space between each of the words in each line, there is no reason to go to the trouble to separate each line into individual words, you can simply print the contents of each line out in a space separate fashion. The only issue you run into using fgets is that it will also read the newline (or carriage return, newline) as part of the string. That is simple to remove. You can replace the entire read loop with:
while (fgets (line, 60, pFile))
{
size_t len = strlen (line);
/* strip trailing newline (or carriage return newline ) */
while (len && (line[len-1] == '\n' || line[len-1] == '\r'))
line[--len] = 0; /* overwrite with null-terminating char */
words[index++] = strdup (line); /* allocate & copy */
if (index == MAXWDS) /* check pointer limit */
break;
}
Output
$ ./bin/fgets_mark <dat/hellomark.txt
Hello my name is mark. and im going to love c!
Standard Way to Read from File Only (not File or stdin)
I apologize for the getting ahead of you a bit by including a way to either open a file (if provided on the command line) or read from stdin (if no filename was provided). The standard way is to first check that the correct number of arguments were provided on the command line, and then open the filename provided, validate it is open, and then process input. What I did was throw a ternary operator into the fopen command that said.
pFile = argc > 1 ? fopen (argv[1], "r") : stdin
The right side of the '=' sign is a ternary operator, which is simply a shorthand for if -> then -> else. What it does is ask is argc > 1? If that tests true, then pFile = fopen (argv[1], "r");. If argc > 1 tests false, then pFile = stdin;
See if the standard way makes more sense:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXWDS 20
#define MAXCHR 60
int main (int argc, char **argv) {
char line[MAXCHR] = {0};
char *words[MAXWDS] = {NULL};
FILE *pFile = NULL;
size_t i, index = 0;
/* validate sufficient input */
if (argc < 2 ) {
fprintf (stderr, "error: insufficient input, usage: %s filename\n", argv[0]);
return 1;
}
/* open file provided on command line for reading */
pFile = fopen (argv[1], "r");
if (!pFile) {
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (fgets (line, 60, pFile)) /* read each line in file */
{
size_t len = strlen (line);
/* strip trailing newline (or carriage return newline ) */
while (len && (line[len-1] == '\n' || line[len-1] == '\r'))
line[--len] = 0; /* overwrite with null-terminating char */
words[index++] = strdup (line); /* allocate & copy */
if (index == MAXWDS) /* check pointer limit */
break;
}
if (pFile != stdin) fclose (pFile);
/* output in a single line */
for (i = 0; i < index; i++) {
printf (" %s", words[i]);
free (words[i]); /* free allocated memory */
}
putchar ('\n');
return 0;
}
A simple state machine does the trick - no line length limitation.
#include <stdio.h>
int main(void) {
FILE *pFile = fopen("test0.txt","r");
if(pFile != NULL) {
int previous_isspace = 0;
int ch;
for (;;) {
ch = fgetc(pFile);
if (ch == EOF) break;
if (isspace(ch)) {
previous_isspace = 1;
} else {
if (previous_isspace == 1) {
fputc(' ', stdout);
}
previous_isspace = 0;
fputc(ch, stdout);
}
}
fclose(pFile);
fputc('\n', stdout); // If code should have a \n at the end
}
}
I think, It is sufficient take a look am i miss anything.
if(pFile != NULL){
// while(fscanf(pFile, " %60s", x) == 1){
while (fgets(x, sizeof(x), pFile) != NULL) {
token = strtok(x,"\r\n");
if(token != NULL)
printf("%s ",x);
else
printf("%s",x);
}
fclose(pFile);
}
I'm writing a program that uses the command-line arguments to receive the name of a text file from the user. The text file is a very simple CSV file such as:
Bob's experiment,12,14,15,16
Mary's experiment,16,15,18
I just want it to print the experiment name then the average of all the numerical values. I'm attempting to do this by putting all the numbers and commas into a char array and I don't know where I've gone wrong.
This is what I have:
int main(int argc, char *argv[])
{
if(argc == 2) {
FILE *txt_file;
txt_file=fopen(argv[1], "rt");
char str[4096];
if(!txt_file) {
printf("File does not exist.\n");
return 1;
}
while(!feof(txt_file)){
char s;
s = fgetc(txt_file);
//prints experiment name
if(s != ',' && (!isdigit(s))) {
printf("%c", s);
}
if(isdigit(s) || s == ',') {
fgets(str, 4096, txt_file);
}
}
fclose(txt_file);
return 0;
}
There are a number of ways to do this, but you should tailor your input routine to the type of data you are reading from your file. Here you are reading lines of data, so you should focus on line-oriented input routines (fgets, getline, or a shoehorned scanf). The basic approach is to read a line of input from your file into a buffer and then parse the line as needed. You can do this dynamically allocating all storage needed, or you can define a maximum value that should be large enough to handle your data.
Next you will need to parse the buffer read from the file to get the experiment name and each of the values associated so that an average can be calculated. Again, there are many ways to do this, but strtol is tailor made for this purpose. It takes a pointer to the string to convert and returns an endptr to the next character that is not a number. This allows you to read a values and set pointer = endptr+1 which sets you up to read your next number.
I have put these pieces together in the example below. It is commented to help you follow along. Drop a comment if you have any additional questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXEXPS 256
int main (int argc, char* argv[])
{
if (argc < 2) {
fprintf (stderr, "error: insufficient input. Usage %s <filename>\n", argv[0]);
return 1;
}
char *line = NULL; /* line read from file (getline allocates if NULL) */
size_t n = 0; /* number of characters to read (0 - no limit) */
ssize_t nchr = 0; /* number of characters actually read by getline */
char *p = NULL; /* pointer to use parsing values from line */
char *lnp = NULL; /* second pointer to use parsing values from line */
char *expname[MAXEXPS] = {0}; /* array of MAXEXPS pointers for experiment names */
int expavg[MAXEXPS] = {0}; /* array of MAXEXPS ints to hold averages */
int val = 0; /* val returned by each call to strtol */
int eidx = 0; /* experiment index */
int idx = 0; /* value index */
FILE *txt_file = fopen(argv[1], "r");
if (!txt_file) {
fprintf (stderr, "error: unable to open file '%s'\n", argv[1]);
return 1;
}
while ((nchr = getline (&line, &n, txt_file)) != -1) /* read each line in file */
{
p = strchr (line, ','); /* find first ',' */
*p = 0; /* set it to null (zero) */
expname[eidx] = strdup (line); /* copy exp name to array (strdup allocates) */
lnp = ++p; /* set lnp to next char */
int sum = 0; /* reset sum to 0 */
idx = 0; /* reset idx to 0 */
while ((val = (int)strtol (lnp, &p, 10)) != 0 && lnp != p) /* read next number */
{
sum += val; /* add val to sum */
lnp = ++p; /* set lnp to next char */
idx++; /* inc idx */
}
expavg[eidx++] = (idx > 0) ? sum / idx : 0; /* calc avg for experiment */
}
fclose (txt_file);
/* print the averages of experiments */
n = 0;
printf ("\n Experiment Avg\n");
printf (" -----------------------\n");
while (expname[n])
{
printf (" %-18s %d\n", expname[n], expavg[n]);
n++;
}
printf ("\n");
/* free all allocated memory */
n = 0;
if (line)
free (line);
while (expname[n])
free (expname[n++]);
return 0;
}
output:
$ ./bin/csvavgfixed dat/csvavg.dat
Experiment Avg
-----------------------
Bob's experiment 14
Mary's experiment 16
memory allocation/free summary:
==22148== HEAP SUMMARY:
==22148== in use at exit: 0 bytes in 0 blocks
==22148== total heap usage: 4 allocs, 4 frees, 723 bytes allocated
==22148==
==22148== All heap blocks were freed -- no leaks are possible
==22148==
==22148== For counts of detected and suppressed errors, rerun with: -v
==22148== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
I think this would do what you want.
#include <stdio.h>
int main(int argc, char* argv[])
{
int n = 0, count = 0, t = 0;
if(argc == 2) {
FILE *txt_file;
txt_file=fopen(argv[1], "rt");
char str[4096];
if(!txt_file) {
printf("File does not exist.\n");
return 1;
}
while(!feof(txt_file)){
char s;
s = fgetc(txt_file);
//prints experiment name
if(s != ',' && (!isdigit(s))) {
if(n!=0) {
printf("%d\n", n / count);
n = 0;
count = 0;
}
printf("%c", s);
}
if(s == ',') {
fscanf(txt_file, "%d", &t);
n+=t;
count++;
}
}
printf("%d\n", n / count);
fclose(txt_file);
return 0;
}
}