Reading a text file into a character array in c - c

I'm writing a program that uses the command-line arguments to receive the name of a text file from the user. The text file is a very simple CSV file such as:
Bob's experiment,12,14,15,16
Mary's experiment,16,15,18
I just want it to print the experiment name then the average of all the numerical values. I'm attempting to do this by putting all the numbers and commas into a char array and I don't know where I've gone wrong.
This is what I have:
int main(int argc, char *argv[])
{
if(argc == 2) {
FILE *txt_file;
txt_file=fopen(argv[1], "rt");
char str[4096];
if(!txt_file) {
printf("File does not exist.\n");
return 1;
}
while(!feof(txt_file)){
char s;
s = fgetc(txt_file);
//prints experiment name
if(s != ',' && (!isdigit(s))) {
printf("%c", s);
}
if(isdigit(s) || s == ',') {
fgets(str, 4096, txt_file);
}
}
fclose(txt_file);
return 0;
}

There are a number of ways to do this, but you should tailor your input routine to the type of data you are reading from your file. Here you are reading lines of data, so you should focus on line-oriented input routines (fgets, getline, or a shoehorned scanf). The basic approach is to read a line of input from your file into a buffer and then parse the line as needed. You can do this dynamically allocating all storage needed, or you can define a maximum value that should be large enough to handle your data.
Next you will need to parse the buffer read from the file to get the experiment name and each of the values associated so that an average can be calculated. Again, there are many ways to do this, but strtol is tailor made for this purpose. It takes a pointer to the string to convert and returns an endptr to the next character that is not a number. This allows you to read a values and set pointer = endptr+1 which sets you up to read your next number.
I have put these pieces together in the example below. It is commented to help you follow along. Drop a comment if you have any additional questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXEXPS 256
int main (int argc, char* argv[])
{
if (argc < 2) {
fprintf (stderr, "error: insufficient input. Usage %s <filename>\n", argv[0]);
return 1;
}
char *line = NULL; /* line read from file (getline allocates if NULL) */
size_t n = 0; /* number of characters to read (0 - no limit) */
ssize_t nchr = 0; /* number of characters actually read by getline */
char *p = NULL; /* pointer to use parsing values from line */
char *lnp = NULL; /* second pointer to use parsing values from line */
char *expname[MAXEXPS] = {0}; /* array of MAXEXPS pointers for experiment names */
int expavg[MAXEXPS] = {0}; /* array of MAXEXPS ints to hold averages */
int val = 0; /* val returned by each call to strtol */
int eidx = 0; /* experiment index */
int idx = 0; /* value index */
FILE *txt_file = fopen(argv[1], "r");
if (!txt_file) {
fprintf (stderr, "error: unable to open file '%s'\n", argv[1]);
return 1;
}
while ((nchr = getline (&line, &n, txt_file)) != -1) /* read each line in file */
{
p = strchr (line, ','); /* find first ',' */
*p = 0; /* set it to null (zero) */
expname[eidx] = strdup (line); /* copy exp name to array (strdup allocates) */
lnp = ++p; /* set lnp to next char */
int sum = 0; /* reset sum to 0 */
idx = 0; /* reset idx to 0 */
while ((val = (int)strtol (lnp, &p, 10)) != 0 && lnp != p) /* read next number */
{
sum += val; /* add val to sum */
lnp = ++p; /* set lnp to next char */
idx++; /* inc idx */
}
expavg[eidx++] = (idx > 0) ? sum / idx : 0; /* calc avg for experiment */
}
fclose (txt_file);
/* print the averages of experiments */
n = 0;
printf ("\n Experiment Avg\n");
printf (" -----------------------\n");
while (expname[n])
{
printf (" %-18s %d\n", expname[n], expavg[n]);
n++;
}
printf ("\n");
/* free all allocated memory */
n = 0;
if (line)
free (line);
while (expname[n])
free (expname[n++]);
return 0;
}
output:
$ ./bin/csvavgfixed dat/csvavg.dat
Experiment Avg
-----------------------
Bob's experiment 14
Mary's experiment 16
memory allocation/free summary:
==22148== HEAP SUMMARY:
==22148== in use at exit: 0 bytes in 0 blocks
==22148== total heap usage: 4 allocs, 4 frees, 723 bytes allocated
==22148==
==22148== All heap blocks were freed -- no leaks are possible
==22148==
==22148== For counts of detected and suppressed errors, rerun with: -v
==22148== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)

I think this would do what you want.
#include <stdio.h>
int main(int argc, char* argv[])
{
int n = 0, count = 0, t = 0;
if(argc == 2) {
FILE *txt_file;
txt_file=fopen(argv[1], "rt");
char str[4096];
if(!txt_file) {
printf("File does not exist.\n");
return 1;
}
while(!feof(txt_file)){
char s;
s = fgetc(txt_file);
//prints experiment name
if(s != ',' && (!isdigit(s))) {
if(n!=0) {
printf("%d\n", n / count);
n = 0;
count = 0;
}
printf("%c", s);
}
if(s == ',') {
fscanf(txt_file, "%d", &t);
n+=t;
count++;
}
}
printf("%d\n", n / count);
fclose(txt_file);
return 0;
}
}

Related

Going Through multiple columns of a csv file in C

I want to write a program, that reads a very large csv file. I want the file to read the columns by name and then print the entirety of the column. However it only prints out one of the columns in the datalist. So it only prints out the unix timestamp columns out of the entirety of the program. I want the code to be able to print out the other columns as well Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USD
csv file:
Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume BTC,Volume USD
1605139200.0,2020-11-12,BTCUSD,15710.87,15731.73,15705.58,15710.01,1.655,26014.29
1605052800.0,2020-11-11,BTCUSD,15318,16000,15293.42,15710.87,1727.17,27111049.25
1604966400.0,2020-11-10,BTCUSD,15348.2,15479.49,15100,15318,1600.04,24521694.72
1604880000.0,2020-11-09,BTCUSD,15484.55,15850,14818,15348.2,2440.85,37356362.78
1604793600.0,2020-11-08,BTCUSD,14845.5,15672.1,14715.98,15484.55,987.72,15035324.13
Current code:
#include<stdio.h>
#include<stdlib.h>
void main()
{
char buffer[1001]; //get line
float timestampfile;
FILE *fp;
int i=1; //line
fp = fopen("filename.csv", "r"); //used to read csv
if(!fp)
{
printf("file not found"); //file not found
exit(0);
}
fgets(buffer,1000, fp); //read line
printf("Expected output print the first column:\n");
while(feof(fp) == 0)
{
sscanf(buffer,"%f",&timestampfile); //read data line
printf("%d: %f\n",i,timestampfile); //used to print data
i++;
fgets(buffer, 1000, fp);
}
printf("end of the column");
fclose(fp);
}
Current output:
1: 1605139200.000000
2: 1605052800.000000
3: 1604966400.000000
4: 1604880000.000000
5: 1604793600.000000
end of the column
You have started out in the right direction, but you have stumbled a bit in handling separating the comma separated values. The standard C library provides all you need to handle separating the values.
Simple Implementation Using strtok()
The easiest implementation would be to take the filename to read and the index of column to extract as the first two arguments to your program. Then you could simply discard the heading row and output the requested value for the column index. That could be done with a simple loop that keeps track of the token number while calling strtok(). Recall on the first call to strtok() the variable name for the string is passed as the first parameter, ever successive call passes NULL as the first argument until no more tokens are found.
A short example would be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
#define DELIM ",\n"
int main (int argc, char **argv) {
if (argc < 3) { /* validate filename and column given as arguments */
fprintf (stderr, "usage: %s filename column\n", argv[0]);
return 1;
}
char buf[MAXC]; /* buffer to hold line */
size_t ndx = strtoul (argv[2], NULL, 0); /* column index to retrieve */
FILE *fp = fopen (argv[1], "r"); /* file pointer */
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!fgets (buf, MAXC, fp)) { /* read / discard headings row */
fputs ("error: empty file.\n", stderr);
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read / validate each line */
char *p = buf;
size_t i = 0;
/* loop until the ndx token found */
for (p = strtok(p, DELIM); p && i < ndx; p = strtok (NULL, DELIM))
i++;
if (i == ndx && p) /* validate token found */
puts (p);
else { /* handle error */
fputs ("error: invalid index\n", stderr);
break;
}
}
}
(note: strtok() considers multiple delimiters as a single delimiter. It cannot be used when empty fields are a possibility such as field1,field2,,field4,.... strsep() was suggested as a replacement for strtok() and it does handle empty-fields, but has shortcomings of its own.)
Example Use/Output
first column (index 0):
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 0
1605139200.0
1605052800.0
1604966400.0
1604880000.0
1604793600.0
second column (index 1)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 1
2020-11-12
2020-11-11
2020-11-10
2020-11-09
2020-11-08
thrid column (index 2)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 2
BTCUSD
BTCUSD
BTCUSD
BTCUSD
BTCUSD
forth column (index 3)
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 3
15710.87
15318
15348.2
15484.55
14845.5
request out of range:
$ ./bin/readcsvbycol_strtok dat/largecsv.csv 9
error: invalid index
More Involved Example Displaying Headings as Menu
If you wanted to provide a short interface for the user to choose which column to output, you could count the columns available. You can determine the number of commas present (and adding one more provides the number of columns). You can then save the headings to allow the user to select which column to output by allocating column number of pointers and then by allocating storage for each heading and copying the heading to the storage. You can then display the headings as a menu for the user to select from.
After determining which column to print, you simply read each line into your buffer, and then tokenize the line with either strtok() or strcspn() (the downside to strtok() is that it modifies the buffer, so if you need to preserve it, make a copy). strcspn() returns the length of the token, so it provides the advantage of not modifying the original and providing the number of characters in the token. Then you can output the column value and repeat until you run out of lines.
An example would be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
int main (int argc, char **argv) {
char buf[MAXC], *p = buf, **headings = NULL;
size_t cols = 1, ndx = 0, nchr;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!fgets (buf, MAXC, fp)) { /* read / validate headings row */
fputs ("error: empty file.\n", stderr);
return 1;
}
while (*p && (p = strchr (p, ','))) { /* loop counting ',' */
cols++;
p++;
}
p = buf; /* reset p to start of buf */
/* allocate cols pointers for headings */
if (!(headings = malloc (cols * sizeof *headings))) {
perror ("malloc-heading pointers");
return 1;
}
/* loop separating headings, allocate/assign storage for each, copy to storage */
while (*p && *p != '\n' && (nchr = strcspn (p, ",\n"))) {
if (!(headings[ndx] = malloc (nchr + 1))) { /* allocate/validate */
perror ("malloc headings[ndx]");
return 1;
}
memcpy (headings[ndx], p, nchr); /* copy to storage */
headings[ndx++][nchr] = 0; /* nul-terminate */
p += nchr+1; /* advance past ',' */
}
if (ndx != cols) { /* validate ndx equals cols */
fputs ("error: mismatched cols & ndx\n", stderr);
return 1;
}
puts ("\nAvailable Columns:"); /* display available columns */
for (size_t i = 0; i < cols; i++)
printf (" %2zu) %s\n", i, headings[i]);
while (ndx >= cols) { /* get / validate selection */
fputs ("\nSelection: ", stdout);
if (!fgets (buf, MAXC, stdin)) { /* read input (same buffer) */
puts ("(user canceled input)");
return 0;
}
if (sscanf (buf, "%zu", &ndx) != 1 || ndx >= cols) /* convert/validate */
fputs (" error: invalid index.\n", stderr);
}
printf ("\n%s values:\n", headings[ndx]); /* display column name */
while (fgets (buf, MAXC, fp)) { /* loop displaying column */
char column[MAXC];
p = buf;
/* skip forward ndx ',' */
for (size_t col = 0; col < ndx && (p = strchr (p, ',')); col++, p++) {}
/* read column value into column */
if ((nchr = strcspn (p, ",\n"))) {
memcpy (column, p, nchr); /* copy */
column[nchr] = 0; /* nul-terminate */
puts (column); /* output */
}
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
for (size_t i = 0; i < cols; i++) /* free all allocated memory */
free (headings[i]);
free (headings);
}
Example Use/Output
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 1
Date values:
2020-11-12
2020-11-11
2020-11-10
2020-11-09
2020-11-08
Or the open values:
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 3
Open values:
15710.87
15318
15348.2
15484.55
14845.5
Column out of range canceling input with Ctrl + d (Ctrl + z on windows):
$ ./bin/readcsvbycol dat/largecsv.csv
Available Columns:
0) Unix Timestamp
1) Date
2) Symbol
3) Open
4) High
5) Low
6) Close
7) Volume BTC
8) Volume USD
Selection: 9
error: invalid index.
Selection: (user canceled input)
Both approaches accomplish the same thing, it all depends on your program needs. Look things over and let me know if you have further questions.
In order to extract more than one field by name, you must get the names of the fields to extract, for example as command line arguments, determine the corresponding columns, and for each line of the CSV file, output the requested columns.
Below is a simple program that extracts columns from a CSV file and produces another CSV file. It does not use strtok() nor strchr() but analyses the line one character at a time to find the starting and ending offset of the columns and acts accordingly. The source file is passed as redirected input and the output can be redirected to a different CSV file.
Here is the code:
#include <stdio.h>
#include <string.h>
int find_header(const char *line, const char *name) {
int len = strlen(name);
int i, n, s;
for (i = n = s = 0;; i++) {
if (line[i] == ',' || line[i] == '\n' || line[i] == '\0') {
if (len == i - s && !memcmp(line + s, name, len))
return n;
if (line[i] != ',')
return -1;
s = i + 1;
n++;
}
}
}
int main(int argc, char *argv[]) {
char buffer[1002];
int field[argc];
char *name[argc];
int i, n;
if (argc < 2) {
printf("usage: csvcut FIELD1 [FIELD2 ...] < CSVFILE\n");
return 2;
}
// read the input header line
if (!fgets(buffer, sizeof buffer, stdin)) {
fprintf(stderr, "missing header line\n");
return 1;
}
// determine which columns to extract
for (n = 0, i = 1; i < argc; i++) {
int f = find_header(buffer, argv[i]);
if (f < 0) {
fprintf(stderr, "field not found: %s\n", argv[i]);
} else {
name[n] = argv[i];
field[n] = f;
n++;
}
}
// output new header line
for (i = 0; i < n; i++) {
if (i > 0)
putchar(',');
printf("%s", name[i]);
}
putchar('\n');
// parse the records, output the selected fields
while (fgets(buffer, sizeof buffer, stdin)) {
for (i = 0; i < n; i++) {
int j, s, f, start, length;
if (i > 0)
putchar(',');
// find field boundaries
for (j = s = f = start = length = 0;; j++) {
if (buffer[j] == ',' || buffer[j] == '\n' || buffer[j] == '\0') {
if (f == field[i]) {
start = s;
length = j - s;
break;
}
if (buffer[j] != ',')
break;
s = j + 1;
f++;
}
}
printf("%.*s", length, buffer + start);
}
putchar('\n');
}
return 0;
}
Sample run:
./csvcut Date Close < sample.csv
Date,Close 2020-11-12,15710.01
2020-11-11,15710.87
2020-11-10,15318
2020-11-09,15348.2
2020-11-08,15484.55
Note that fields cannot contain embedded commas. The program could be extended to handle quoted contents to support these.

How to sum up numbers from each lines in file in c?

I need to sum up the numbers from each line in the file like this e.g.:
1 2 3
10 -1 -3
and the result I should write to another file in each line likes this:
6
6
And I have the problem when in each line after the last number in reading file have more spaces, for example, maybe I use the '_' to show this problem:
When my function works:
10_11_12 '\n'
1_2_3 '\n'
and when my function doesn't work:
10_11_12_ _ _ '\n'
1_2_3 '\n'
I think I know where is the problem, but I have no idea how to fix it.
It's my function here:
int num=0;
char s;
while(fscanf(file, "%d", &num)==1){
fscanf(file, "%c", &s);
sum+=num;
if(s=='\n'){
fprintf(res_file, "%d\n", sum);
sum=0;
}
}
The problem is that fscanf is expecting a pointer to a char. Within your function, you are using a regular char, s.
char s;
You can fix your issue by making s a pointer. First, Allocate memory.
char *s = malloc(sizeof(char) + 1);
Now we can properly scan into the variable, s, and then check for the newline character. The only difference here is now we check for the newline by dereferencing s.
if (*s == '\n')
Don't forget to clean up the memory leak with free()!
free(s);
I was able to get the desired output using the code below.
#include <stdio.h>
#include <stdlib.h>
int processInputFile(char *filename)
{
FILE *ifp;
int buffer = 0;
char *newline = malloc(sizeof(char) + 1);
int sum = 0;
if ((ifp = fopen(filename, "r")) == NULL)
{
fprintf(stderr, "Failed to open \"%s \" in processInputFile.\n", filename);
return -1;
}
while(fscanf(ifp, "%d", &buffer) == 1)
{
fscanf(ifp, "%c", newline);
sum += buffer;
if (*newline == '\n')
{
printf("%d\n", sum);
sum = 0;
}
}
free (newline);
fclose(ifp);
}
int main(int argc, char **argv)
{
if (argc < 2)
{
printf("Proper syntax: ./a.out <n>\n");
return -1;
}
processInputFile(argv[1]);
return 0;
}
Any kind of line-by-line processing in C is easier done by reading the line first, and then processing it. fgets(3) handles end-of-line for you; then you just need to scan what it read. Plus, in the real world, some lines won't scan: either they'll have errors, or your scan won't be general enough. When that happens, it's awfully handy to write the input to standard error, so you can see what you're looking at.
Here's a complete program that does what you want. It assumes lines are less than 80 bytes long and doesn't protect against invalid input, though.
#include <stdio.h>
#include <err.h>
int main( int argc, char *argv[] ) {
char line[80];
static const char *filename = "sum.dat";
FILE *input;
if( (input = fopen(filename, "r")) == NULL ) {
err(1, "could not open %s", filename);
}
for( int nlines = 0;
fgets(line, sizeof(line), input) != NULL;
nlines++ )
{
double value, sum = 0;
int n;
for( char *p = line; sscanf(p, "%lf%n", &value, &n) > 0; p += n ) {
sum += value;
}
printf( "line %d: sum = %lf\n", nlines, sum );
}
return 0;
}
Reading with a line-oriented input function like fgets() or POSIX getline() ensures that a complete line of input is consumed on each call. (don't skimp on buffer size). strtol was created to convert an unknown number of values per-line into long. You walk-a-pointer down your buffer by utilizing the endptr parameter filled by strtol after a successful conversion to point to the next character after the last digit converted.
This allows a simple method to use a pair of pointers, p your start-pointer and ep your end-pointer to work through an entire line converting values as you go. The basic approach is to call strtol, validate it succeeded, and then set p = ep; to advance to the start of your next conversion. strtol ignores leading whitespace.
Putting it altogether, you could do:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
/* (don't skimp on buffer-size) */
int main (int argc, char **argv) {
char buf[MAXC]; /* buffer to hold each line read */
size_t n = 0; /* line-counter */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read each line */
char *p = buf, *ep = p; /* pointer and end-pointer */
int sum = 0; /* variable to hold sum */
if (*buf == '\n') /* ignore empty lines */
continue;
while (*p && *p != '\n') {
errno = 0;
long tmp = strtol (p, &ep, 0); /* convert to temp long */
if (p == ep) { /* validate digits were converted */
fputs ("error: no digits extracted.\n", stderr);
break;
}
else if (errno) { /* validate no under/overflow occurred */
fputs ("error: underflow/overflow occurred.\n", stderr);
break;
}
else if (tmp < INT_MIN || INT_MAX < tmp) { /* validate in range */
fputs ("error: tmp exceeds range of int.\n", stderr);
break;
}
sum += tmp; /* add tmp to sum */
p = ep; /* set p to end-ptr (one past last digit used) */
}
n++; /* advance line counter */
printf ("sum line [%2zu] : %d\n", n, sum); /* output sum */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
(note: the if (*buf == '\n') which tests if the first character in the line is a newline character and simple skips to the next line, no need to worry about converting values in a empty line)
Example Use/Output
Using your data in dat/sumlines.txt produces the expected results.
$ ./bin/sumline dat/sumlines.txt
sum line [ 1] : 6
sum line [ 2] : 6
Let me know if you have further questions.

Reading double from a file with system call read

double a[5];
for(int i = 0; i < 5; ++i){
read(fd, &a[i], sizeof(double));
}
When I print the content of the array, it shows me only zeros. How can I read double numbers from a text file without using fscanf?
File.txt
2.00 5.11 6.90 3.4 8.7
If I read char by char until the end of line, everything is fine.
As suggested by others if you don't want to use fscanf() then probably you should read all data from file using read() and store into char buffer and parse upto whitespace and then use strtod() to convert resultant string into double.
Here is the helping solution not complete one
int main() {
int fd = open("input.txt",O_RDWR | 0664);
if(fd == -1) {
perror("open");
return 0;
}
/* first find the size of file */
int size = lseek(fd,0,2);
printf("size of file : %d \n",size);
lseek(fd,0,0);/* again start reading from beginning */
/* take buffer equal to size of file */
char *buf = malloc(size * sizeof(char) + 1);
/* read all at a time using read()*/
read(fd,buf,size);
buf[size] = '\0';
printf("%s\n",buf);
/* now parse using strtod() */
double res;
char new_buf[64]; /* to store each double number read from fil
e */
for(int iter = 0,inner_iter = 0;buf[iter] != '\0' ;iter++ ) {
if(buf[inner_iter]!=' ' ) {
new_buf[inner_iter++] = buf[iter];
continue;
}
else {
new_buf[inner_iter] = '\0';
res = strtod(new_buf,NULL);
printf("%lf\n",res);
inner_iter = 0; /* make this variable 0 again */
}
}
free(buf);
close(fd);
return 0;
}

C read entire line of file [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
I am trying to program a tool in C. Part of this program is to use a text file and read it line by line, while storing all lines into an array to have it available for future use.
That's what I have so far:
int main(){
FILE *fp = fopen("file.txt", "ab+");
if (fp == NULL) {
printf("FILE ERROR");
return 1;
}
int lines = 0;
int ch = 0;
while(!feof(fp)){
ch = fgetc(fp);
if(ch == '\n'){
lines++;
}
}
printf("%d\n", lines);
if (lines>0){
int i = 0;
int numProgs = 0;
char* programs[lines];
char line[lines];
FILE *file;
file = fopen("file.txt", "r");
while(fgets(line, sizeof(line), file) != NULL){
programs[i] = strdup(line);
i++;
numProgs++;
}
for (int j= 0; j<sizeof(programs); j++){
printf("%s\n", programs[j]);
}
fclose(file);
fclose(fp);
return 0;
}
My problem is im getting this output:
6 (the number of lines in the file)
Segmentation fault
How can I read a complete line by line , without knowing how long the line is in the beginning. in PHP I can do that very easily, but how can I do that in C?
Thanks for any hint!
fix like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void){
FILE *fp = fopen("file.txt", "r");//!
if (fp == NULL) {
fprintf(stderr, "FILE ERROR\n");
return 1;
}
int lines = 0;
int ch = 0;
int len = 0;//! length of line
int max_len = 0;//! max length of line
while((ch = fgetc(fp))!=EOF){//!
++len;
if(ch == '\n'){
if(max_len < len)
max_len = len;
++lines;
len = 0;
}
}
if(len)
++lines;
fprintf(stderr, "%d lines.\n", lines);
if (lines > 0){
int numProgs = 0;
char *programs[lines];//use malloc, char **programs = malloc(lines * sizeof(*programs));
char line[max_len+1];//!
rewind(fp);//!
while(fgets(line, sizeof(line), fp))
programs[numProgs++] = strdup(line);//!
for (int j= 0; j < numProgs; j++){//!
printf("%s", programs[j]);//!
free(programs[j]);//!
}
}
fclose(fp);
return 0;
}
If you truly want to read an unknown number of characters from an unknown number of lines and store those lines in an array (or, actually, in an object created from a pointer-to-pointer-to-char), then you have a number of options. POSIX getline is a line oriented input function (like fgets) which will read a line of text from the give file each time it is called, and will allocate sufficient storage to hold the line regardless of the length. (as a bonus getline returns the actual number of characters read, eliminating a subsequent call to strlen if the length is needed)
getline eliminates the need for repeated checks on whether fgets actually read the whole line, or just a partial. Further, if your lines are more than a few characters long, the buffered read provided by getline (and fgets) is quite a bit faster than character oriented input (e.g. fgetc). Don't get me wrong, there is nothing wrong with fgetc, and if your files are small and your lines short, you are not going to notice any difference. However, if you are reading a million lines of 500,000 chars each -- you will notice a significant difference.
As for an array, since you don't know how many lines you will read, you really need a pointer-to-pointer-to-char (e.g a double-ponter, char **array) so you can allocate some reasonable number of pointers to begin with, allocate and assign the lines to individual pointer until your limit is reached, then realloc array to increase the number of pointers available, and keep on reading/storing lines.
As with any code that dynamically allocates memory, your must (1) preserve a pointer to each block of memory allocated, so (2) the memory can be freed with no longer in use. You should also validate each allocation (and reallocation) to insure the allocations succeed. When using realloc, always use a temporary pointer so you can validate that realloc succeeds before assigning the new block to the original pointer. If you don't, and realloc fails, you have lost the pointer to your original block of memory that is left untouched, not freed, and you have just created a memory leak.
Lastly, always verify your memory use with a memory error check program such as valgrind on Linux. There are a number of subtle ways to misuse a block of memory.
Putting all that together, you could do something like the following. The code will read all lines from the filename provided as the first argument (or from stdin if no filename is given):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXA = 128 }; /* initial allocation size, MAXA must be >= 1 */
int main (int argc, char **argv) {
char *line = NULL;
char **arr = NULL;
size_t i, maxa = MAXA, n = 0, ndx = 0;
ssize_t nchr = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
/* allocate MAXA pointers to char -- initially & validate */
if (!(arr = calloc (maxa, sizeof *arr))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return 1;
}
while ((nchr = getline (&line, &n, fp)) != -1) { /* read each line */
while (line[nchr-1] == '\n') line[--nchr] = 0; /* remove '\n' */
if (!(arr[ndx] = strdup (line))) { /* allocate, copy, add to arr */
fprintf (stderr, "error: virtual memory exhausted.\n");
break; /* leave read loop, preserving existing arr */
}
if (++ndx == maxa) { /* if allocation limit reached, realloc arr */
size_t asz = sizeof *arr;
void *tmp = realloc (arr, (maxa + MAXA) * asz);
if (!tmp) { /* validate realloc succeeded */
fprintf (stderr, "error: realloc, memory exhausted.\n");
break; /* preserving original arr */
}
arr = tmp; /* assign & zero (optional) new memory */
memset (arr + (maxa + MAXA) * asz, 0, MAXA * asz);
maxa += MAXA; /* update current allocation limit */
}
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
if (line) free (line); /* free mem allocated by getline */
for (i = 0; i < ndx; i++) /* output array */
printf (" arr[%4zu] : %s\n", i, arr[i]);
for (i = 0; i < ndx; i++) /* free allocated memory */
free (arr[i]); /* free each line */
free (arr); /* free pointers */
return 0;
}
Example Use/Output
$ ./bin/getline_realloc_arr < dat/words_554.txt
arr[ 0] : Aam
arr[ 1] : Aard-vark
arr[ 2] : Aard-wolf
arr[ 3] : Aaronic
...
arr[ 549] : Accompaniment
arr[ 550] : Accompanist
arr[ 551] : Accompany
arr[ 552] : Accompletive
arr[ 553] : Accomplice
Look things over and let me know if you have any questions.
Try Online
#include <stdio.h>
#include <stdlib.h>
char * readLine (FILE * file)
{
size_t len = 0;
int c = 0, i = 0;
long pos = ftell(file);
char * out = 0;
// read the whole line
do { c = fgetc(file); len++; }
while (c!='\0' && c!='\n' && c!=EOF);
// if the cursor didn't move return NULL
if (pos == ftell(file) && c == EOF) return 0;
// allocate required memory
out = (char*)malloc(len+1);
// rewind cursor to beginning of line
fseek (file, pos, SEEK_SET);
// copy the line
do { out[i++] = fgetc(file); }
while (c!='\0' && c!='\n' && c!=EOF);
// make sure there's \0 at the end
out[i] = '\0';
return out;
}
int main (void)
{
// FILE * file = fopen("test.txt", "r");
char * line = readLine(stdin);
while(line)
{
printf(line); // print current line
free(line); // free allocated memory
line = readLine(stdin); // recur
}
return 0;
}
Read up on malloc / realloc and friends.
A first approach for reading a single line might be something along the lines of the following (note that this is a toy program, and as such omits error-checking):
size_t line_length = 0;
char *line = NULL;
char ch;
while ((ch = fgetc(fp)) != '\n') {
line = realloc(line, line_length+1);
line[line_length++] = ch;
}
// Add null character at end of line
line = realloc(line, line_length+1);
line[line_length] = 0;
The biggest problem with this is that it's slow, and especially slow for long lines. A better approach would be to keep track of the allocated and written size, and exponentially-grow size of the array as necessary, and then trim to the actual required length at the end.
Also, it'd probably be better (and simpler) to use fgets for that approach.
For reading multiple lines, you can nest this approach.

How to sort lines of file depending on value at end of each line

I'm trying to create a program that takes an input file and sorts it to a new output file in ascending order depending on the number at the end of each line. For example, if the input file contains three lines below:
a good man 50
65
better are 7
The corresponding sorted output file would be three lines but sorted:
better are 7
a good man 50
65
Code I have so far:
int sortLines(char * inputFileName, char * outputFileName)
{
FILE *fpin = fopen(inputFileName, "r");//open file to to read
if (!fpin)
{
printf("Error in file opening\n");
exit (-1);
}
FILE *fpout = fopen(outputFileName, "w");//open file to to write
if (!fpout)
{
printf("Error in opfile opening\n");
exit (-1);
}
char file[10][1024];
int i = 0;
while(fgets(file[i], sizeof(file[i]), fpin))
i++;
int total = i;
for(i = 0; i<total; ++i)
printf("%s", file[i]);
return 0;
}
Continuing on from the comment, you can read the lines into a struct (containing the line and an int), then use strrchr to find the last space in each line (or if null, just take the whole line), convert the string with strtol or atoi or the like to set the int field of struct. Then it is a simple matter of sorting the structs based on the int member. I'll leave the reading into the struct to you, the sorting example is:
#include <stdio.h>
#include <stdlib.h>
#define MAXL 32
struct data {
char line[MAXL];
int n;
};
int compare_n (const void *a, const void *b)
{
struct data *ia = (struct data *)a;
struct data *ib = (struct data *)b;
return (int)(ia->n - ib->n);
}
int main (void)
{
struct data lines[] = {{"a good man 50", 50}, {"65", 65}, {"better are 7", 7}};
size_t nstr = sizeof lines / sizeof *lines;
size_t i = 0;
qsort (lines, nstr, sizeof *lines, compare_n);
for (i = 0; i < nstr; i++)
printf (" %s\n", lines[i].line);
return 0;
}
Output
$ ./bin/struct_sort_int
better are 7
a good man 50
65
full example
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXL 64
/* simple struct holding char array and int */
struct data {
char line[MAXL];
int n;
};
/* qsort comparison function for int 'n' */
int compare_n (const void *a, const void *b)
{
struct data *ia = (struct data *)a;
struct data *ib = (struct data *)b;
return (int)(ia->n - ib->n);
}
int main (int argc, char **argv)
{
if (argc < 2 ) { /* validate at least 1 argument provided */
fprintf (stderr, "error: insufficient input, usage: %s filename\n", argv[0]);
return 1;
}
struct data lines[MAXL] = {{{0}, 0}}; /* array of struct */
char *ln = NULL; /* buffer for getline, getline allocates */
size_t n = 0; /* initial size of buf, 0 getline decides */
ssize_t nchr = 0; /* getline return, no. of chars read */
size_t idx = 0; /* index for array of struct */
size_t i = 0; /* general iteration variable */
FILE *fp = NULL; /* file pointer for input file */
if (!(fp = fopen (argv[1], "r"))) { /* validate file open */
fprintf (stderr, "error: file open failed. '%s'\n", argv[1]);
return 1;
}
/* read each line in file */
while ((nchr = getline (&ln, &n, fp)) != -1)
{
while (nchr > 0 && (ln[nchr-1] == '\n' || ln[nchr-1] == '\r'))
ln[--nchr] = 0; /* strip newline or carriage rtn */
if (!nchr) continue; /* skip blank lines */
if (nchr > MAXL - 1) { /* test for line > MAXL -1 */
fprintf (stderr,
"warning: line will exceeded %d chars.\n", MAXL);
continue; /* number at end invalid */
}
strcpy (lines[idx].line, ln); /* copy to struct.line */
char *p = NULL;
if (!(p = strrchr (ln, ' '))) /* pointer to last space */
p = ln; /* if no space, then line */
lines[idx].n = atoi (p); /* convert string to int */
idx++; /* increment index */
if (idx == MAXL) { /* if MAXL read, break */
fprintf (stderr, "warning: %d lines read.\n", MAXL);
break;
}
}
if (fp) fclose (fp); /* close input file */
if (ln) free (ln); /* free line buffer mem */
qsort (lines, idx, sizeof *lines, compare_n); /* sort struct */
for (i = 0; i < idx; i++) /* print sorted array */
printf (" %s\n", lines[i].line);
return 0;
}
Take a look and let me know if you have questions. Your data was in the file dat/endno.txt for my test. I'll add comments when I get a chance.
note: updated to skip blank lines and to check line length against MAXL to eliminate the possibility of a write beyond end of lines and skip lines that would be truncated rendering the number at end invalid.
without struct statically allocated arrays
The following is an example that uses two 2D arrays, one for the lines and then one holding the original line index and number at end of line. Unlike the dynamically allocated example below, this example is limited to reading MAXL lines from the file or no more than MAXS characters each. If a line is exactly MAXS characters long (including the null-terminator), it must be discarded, because there is no way of knowing if the number at end remains valid. The 2D array containing the line index and number at end is sorted based on the number at end, then lines are printed based on the original line index resulting in the lines printing in sorted order by number at end. While this may look simpler, it is far more limited than the method utilizing the struct or the dynamically allocated approach below. This is about all I can think to do to get your going. Good luck. Drop a line if you have questions.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXL 64
#define MAXS 128
int cmpint (const void *a, const void *b);
int main (int argc, char **argv) {
if (argc < 2 ) { /* validate at least 1 argument provided */
fprintf (stderr, "error: insufficient input, usage: %s filename\n", argv[0]);
return 1;
}
int numidx[MAXL][2] = {{0}}; /* array of integers */
char lines[MAXL][MAXS] = {{0}}; /* array of strings */
char ln[MAXS] = {0}; /* buffer for fgets, MAXS in length */
ssize_t nchr = 0; /* getline return, no. of chars read */
size_t idx = 0; /* index for array of struct */
size_t i = 0; /* general iteration variable */
FILE *fp = NULL; /* file pointer for input file */
if (!(fp = fopen (argv[1], "r"))) { /* validate file open */
fprintf (stderr, "error: file open failed. '%s'\n", argv[1]);
return 1;
}
/* read each line in file */
while (fgets (ln, MAXS, fp) != NULL)
{
nchr = strlen (ln); /* get length of ln */
while (nchr > 0 && (ln[nchr-1] == '\n' || ln[nchr-1] == '\r'))
ln[--nchr] = 0; /* strip newline or carriage rtn */
if (!nchr || nchr == MAXS - 2) /* skip blank lines + full */
continue; /* lines (end no. invalid) */
strcpy (lines[idx], ln); /* copy ln to lines[idx] */
char *p = NULL;
if (!(p = strrchr (ln, ' '))) /* pointer to last space */
p = ln; /* if no space, then line */
numidx[idx][0] = atoi (p); /* save end no. in array */
numidx[idx][1] = idx; /* save line index in array */
idx++; /* increment index */
if (idx == MAXL) { /* if MAXL read, break */
fprintf (stderr, "warning: %d lines read.\n", MAXL);
break;
}
}
fclose (fp);
qsort (numidx, idx, sizeof (int) * 2, cmpint);/* sort array */
for (i = 0; i < idx; i++) /* print sorted array */
printf (" %s\n", lines[numidx[i][1]]);
return 0;
}
/* qsort integer compare function */
int cmpint (const void *pa, const void *pb )
{
const int *a = pa;
const int *b = pb;
if (a[0] < b[0])
return -1;
return (b[0] < a[0]);
}
without struct, dynamically allocated arrays
To get around using a structure to hold the string an number, you can use 2 arrays. One to hold the strings, and another 2D array holding the original line index and number at end of line (2 integers). You then qsort the integer array on the (number at end) element, then loop through each line printing out the lines in sorted order based on the line index value of the sorted array. This is set to handle lines of any length and reallocate the number of lines (in each array) as needed. Since the dynamic allocation may be a bit much, I'm working on a static array version as well, but it will be tomorrow before I have time. Here is the first version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXL 64
int cmpint (const void *a, const void *b);
char **realloc_char (char **sp, size_t *n);
int **realloc_int (int **ip, size_t *n);
int main (int argc, char **argv) {
if (argc < 2 ) { /* validate at least 1 argument provided */
fprintf (stderr, "error: insufficient input, usage: %s filename\n", argv[0]);
return 1;
}
int **numidx = NULL; /* array of pointers-to-pointer-to-int */
char **lines = NULL; /* array of pointer-to-pointer-to-char */
char *ln = NULL; /* buffer for getline, getline allocates */
size_t n = 0; /* initial size of buf, 0 getline decides */
ssize_t nchr = 0; /* getline return, no. of chars read */
size_t idx = 0; /* index for array of struct */
size_t i = 0; /* general iteration variable */
size_t maxl = MAXL; /* holds current allocation size of arrays */
FILE *fp = NULL; /* file pointer for input file */
if (!(fp = fopen (argv[1], "r"))) { /* validate file open */
fprintf (stderr, "error: file open failed. '%s'\n", argv[1]);
return 1;
}
/* allocate MAXL pointers to int* */
if (!(numidx = calloc (MAXL, sizeof *numidx))) {
fprintf (stderr, "error: memory allocation failed.\n");
return 1;
}
/* allocate MAXL pointers to char* */
if (!(lines = calloc (MAXL, sizeof *lines))) {
fprintf (stderr, "error: memory allocation failed.\n");
return 1;
}
/* read each line in file */
while ((nchr = getline (&ln, &n, fp)) != -1)
{
while (nchr > 0 && (ln[nchr-1] == '\n' || ln[nchr-1] == '\r'))
ln[--nchr] = 0; /* strip newline or carriage rtn */
if (!nchr) continue; /* skip blank lines */
lines[idx] = strdup (ln); /* copy ln to lines[idx] */
/* allocate space for 2 int at numidx[idx] */
if (!(numidx[idx] = calloc (2, sizeof **numidx))) {
fprintf (stderr, "error: memory allocation failed.\n");
return 1;
}
char *p = NULL;
if (!(p = strrchr (ln, ' '))) /* pointer to last space */
p = ln; /* if no space, then line */
numidx[idx][0] = atoi (p); /* save end no. in array */
numidx[idx][1] = idx; /* save line index in array */
idx++; /* increment index */
if (idx == maxl) { /* if idx = maxl reallocate */
size_t tsz = maxl; /* tmp var, each get maxl */
numidx = realloc_int (numidx, &tsz);
lines = realloc_char (lines, &maxl);
}
}
if (ln) free (ln);
fclose (fp);
qsort (numidx, idx, sizeof *numidx, cmpint); /* sort struct */
for (i = 0; i < idx; i++) /* print sorted array */
printf (" %s\n", lines[numidx[i][1]]);
for (i = 0; i < idx; i++) { /* free allocated memory */
free (numidx[i]);
free (lines[i]);
}
free (numidx);
free (lines);
return 0;
}
/* qsort integer compare function */
int cmpint (const void *a, const void *b)
{
const int **ia = (const int **)a;
const int **ib = (const int **)b;
return (*ia)[0] - (*ib)[0];
}
/** realloc an array of pointers to strings setting memory to 0.
* reallocate an array of character arrays setting
* newly allocated memory to 0 to allow iteration
*/
char **realloc_char (char **sp, size_t *n)
{
char **tmp = realloc (sp, 2 * *n * sizeof *sp);
if (!tmp) {
fprintf (stderr, "Error: struct reallocation failure.\n");
// return NULL;
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/** realloc an array of pointers to int* setting memory to 0.
* reallocate an array of integer arrays setting
* newly allocated memory to 0 to allow iteration
*/
int **realloc_int (int **ip, size_t *n)
{
int **tmp = realloc (ip, 2 * *n * sizeof *ip * 4);
if (!tmp) {
fprintf (stderr, "Error: struct reallocation failure.\n");
// return NULL;
exit (EXIT_FAILURE);
}
ip = tmp;
memset (ip + *n, 0, *n * sizeof *ip * 4); /* memset new ptrs 0 */
*n *= 2;
return ip;
}
You could read the entire file into a single buffer, create an array of structures containing pointers to lines and the values at the end of each line (scan for newline characters), then sort the array of structures by the values, and output the data according to the pointers in the sorted array of structures.

Resources