CSV File Input in C using Structures - c

I want to print the data from .csv file line by line which is separated by comma delimeter.
This code prints the garbage value .
enum gender{ M, F };
struct student{
int stud_no;
enum gender stud_gen;
char stud_name[100];
int stud_marks;
};
void main()
{
struct student s[60];
int i=0,j,roll_no,marks,k,select;
FILE *input;
FILE *output;
struct student temp;
input=fopen("Internal test 1 Marks MCA SEM 1 oct 2014 - CS 101.csv","r");
output=fopen("out.txt","a");
if (input == NULL) {
printf("Error opening file...!!!");
}
while(fscanf(input,"%d,%c,%100[^,],%d", &s[i].stud_no,&s[i].stud_gen,&s[i].stud_name,&s[i].stud_marks)!=EOF)
{
printf("\n%d,%c,%s,%d", s[i].stud_no,s[i].stud_gen,s[i].stud_name,s[i].stud_marks);
i++;
}
}
I also tried the code from: Read .CSV file in C But it prints only the nth field. I want to display all fields line by line.
Here is my sample input.
1401,F,FERNANDES SUZANNA ,13
1402,M,PARSEKAR VIPUL VILAS,14
1403,M,SEQUEIRA CLAYTON DIOGO,8
1404,M,FERNANDES GLENN ,17
1405,F,CHANDRAVARKAR TANUSHREE ROHIT,15

While there are a number of ways to parse any line into components, one way that can really increase understanding is to use a start and end pointer to work down each line identifying the commas, replacing them with null-terminators (i.e. '\0' or just 0), reading the field, restoring the comma and moving to the next field. This is just a manual application of strtok. The following example does that so you can see what is going on. You can, of course, replace use of the start and end pointers (sp & p, respectively) with strtok.
Read through the code and let me know if you have any questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* maximum number of student to initially allocate */
#define MAXS 256
enum gender { M, F };
typedef struct { /* create typedef to struct */
int stud_no;
enum gender stud_gen;
char *stud_name;
int stud_marks;
} student;
int main (int argc, char *argv[]) {
if (argc < 2) {
printf ("filename.csv please...\n");
return 1;
}
char *line = NULL; /* pointer to use with getline () */
ssize_t read = 0; /* characters read by getline () */
size_t n = 0; /* number of bytes to allocate */
student **students = NULL; /* ptr to array of stuct student */
char *sp = NULL; /* start pointer for parsing line */
char *p = NULL; /* end pointer to use parsing line */
int field = 0; /* counter for field in line */
int cnt = 0; /* counter for number allocated */
int it = 0; /* simple iterator variable */
FILE *fp;
fp = fopen (argv[1], "r"); /* open file , read only */
if (!fp) {
fprintf (stderr, "failed to open file for reading\n");
return 1;
}
students = calloc (MAXS, sizeof (*students)); /* allocate 256 ptrs set to NULL */
/* read each line in input file preserving 1 pointer as sentinel NULL */
while (cnt < MAXS-1 && (read = getline (&line, &n, fp)) != -1) {
sp = p = line; /* set start ptr and ptr to beginning of line */
field = 0; /* set/reset field to 0 */
students[cnt] = malloc (sizeof (**students)); /* alloc each stuct with malloc */
while (*p) /* for each character in line */
{
if (*p == ',') /* if ',' end of field found */
{
*p = 0; /* set as null-term char (temp) */
if (field == 0) students[cnt]->stud_no = atoi (sp);
if (field == 1) {
if (*sp == 'M') {
students[cnt]->stud_gen = 0;
} else {
students[cnt]->stud_gen = 1;
}
}
if (field == 2) students[cnt]->stud_name = strdup (sp); /* strdup allocates for you */
*p = ','; /* replace with original ',' */
sp = p + 1; /* set new start ptr start pos */
field++; /* update field count */
}
p++; /* increment pointer p */
}
students[cnt]->stud_marks = atoi (sp); /* read stud_marks (sp alread set to begin) */
cnt++; /* increment students count */
}
fclose (fp); /* close file stream */
if (line) /* free memory allocated by getline */
free (line);
/* iterate over all students and print */
printf ("\nThe students in the class are:\n\n");
while (students[it])
{
printf (" %d %c %-30s %d\n",
students[it]->stud_no, (students[it]->stud_gen) ? 'F' : 'M', students[it]->stud_name, students[it]->stud_marks);
it++;
}
printf ("\n");
/* free memory allocated to struct */
it = 0;
while (students[it])
{
if (students[it]->stud_name)
free (students[it]->stud_name);
free (students[it]);
it++;
}
if (students)
free (students);
return 0;
}
(note: added condition on loop that cnt < MAXS-1 to preserve at least one pointer in students NULL as a sentinel allowing iteration.)
input:
$ cat dat/people.dat
1401,F,FERNANDES SUZANNA ,13
1402,M,PARSEKAR VIPUL VILAS,14
1403,M,SEQUEIRA CLAYTON DIOGO,8
1404,M,FERNANDES GLENN ,17
1405,F,CHANDRAVARKAR TANUSHREE ROHIT,15
output:
$./bin/stud_struct dat/people.dat
The students in the class are:
1401 F FERNANDES SUZANNA 13
1402 M PARSEKAR VIPUL VILAS 14
1403 M SEQUEIRA CLAYTON DIOGO 8
1404 M FERNANDES GLENN 17
1405 F CHANDRAVARKAR TANUSHREE ROHIT 15
valgrind memcheck:
I have updated the code slightly to insure all allocated memory was freed to prevent against any memory leaks. Simple things like the automatic allocation of memory for line by getline or failing to close a file stream can result in small memory leaks. Below is the valgrind memcheck confirmation.
valgrind ./bin/stud_struct dat/people.dat
==11780== Memcheck, a memory error detector
==11780== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==11780== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==11780== Command: ./bin/stud_struct dat/people.dat
==11780==
The students in the class are:
1401 F FERNANDES SUZANNA 13
1402 M PARSEKAR VIPUL VILAS 14
1403 M SEQUEIRA CLAYTON DIOGO 8
1404 M FERNANDES GLENN 17
1405 F CHANDRAVARKAR TANUSHREE ROHIT 15
==11780==
==11780== HEAP SUMMARY:
==11780== in use at exit: 0 bytes in 0 blocks
==11780== total heap usage: 13 allocs, 13 frees, 2,966 bytes allocated
==11780==
==11780== All heap blocks were freed -- no leaks are possible
==11780==
==11780== For counts of detected and suppressed errors, rerun with: -v
==11780== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)

Related

Parse CSV into dynamically allocated array of structures (ANSI 89)

I am trying to parse a csv into a dynamically allocated array of structures, however my attempt crashes with a segmentation fault.
Here is the structure of my data:
SO02773202,5087001,0
SO02773203,5087001,0
SO02773204,5087001,0
SO02773205,5087001,0
SO02773206,5087001,14
This is the struct I am parsing the data into:
typedef struct saleslines{
char* salesid;
char* smmcampaignid;
int numberofbottles;
} saleslines_t;
Here is my attempt at parsing the file:
int read_saleslines(saleslines_t* saleslines, int number_of_lines){
char c;
FILE* fp;
fp = fopen(FILENAME, "r"); /* Open the saleslines file */
if(fp == NULL){ /* Crash if file not found */
printf("Error - file not found\n");
return 0;
}
c = getc(fp);
while (c != EOF){
if (c == '\n'){
number_of_lines += 1;
}
c = getc(fp);
}
printf("Number of lines is %d\n", number_of_lines);
saleslines = (saleslines_t*) malloc((number_of_lines * 2) * sizeof(saleslines_t));
/* allocation of the buffer for every line in the File */
char *buf = (char*) malloc(1000);
char *tmp;
if ( ( fp = fopen(FILENAME, "r" ) ) == NULL )
{
printf( "File could not be opened.\n" );
}
int i = 0;
while (fgets(buf, 255, fp) != NULL){
if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
buf[strlen (buf) - 1] = '\0';
tmp = strtok(buf, ",");
saleslines[i].salesid = strdup(tmp);
tmp = strtok(NULL, ",");
saleslines[i].smmcampaignid = strdup(tmp);
tmp = strtok(NULL, ",");
saleslines[i].numberofbottles = atoi(tmp);
printf("Salesid: %s\nCampaign: %s\nBottles: %i\n\n", saleslines[i].salesid , saleslines[i].smmcampaignid, saleslines[i].numberofbottles);
i++;
}
free(buf);
fclose(fp);
printf("Number of lines is %i\n", number_of_lines);
return number_of_lines;
}
For some reason it parses the file and prints the resulting array of structs, however when I call this function immediately after, it crashes with a segfault:
void print_saleslines_struct(saleslines_t* saleslines, int number_of_lines{
int i;
printf("Number of lines is %i", number_of_lines);
for(i = 0; i < number_of_lines; i++){
printf("Salesid:\t %s\n", saleslines[i].salesid);
printf("Campaign:\t %s\n", saleslines[i].smmcampaignid);
printf("# of Bottles:\t %d\n", saleslines[i].numberofbottles);
}
}
I can't seem to find where this memory bug is.
Here is the initialization and main:
saleslines_t* saleslines;
saleslines_summary_t* saleslines_summary;
saleslines_grouped_t* saleslines_grouped;
int number_of_lines = 0;
int* number_of_linesp = &number_of_lines;
/* Main */
int main(){
int chosen_option;
while(1){
printf("What would you like to do?\n");
printf("1. Read saleslines.txt\n");
printf("2. Print saleslines\n");
printf("3. Summarise saleslines\n");
printf("4. Exit the program\n");
scanf("%d", &chosen_option);
switch(chosen_option){
/* case 1 : number_of_lines = read_saleslines_file(saleslines, number_of_lines); break; */
case 1 : number_of_lines = read_saleslines(saleslines, number_of_lines); break;
case 2 : printf("Number of lines is %i", number_of_lines); print_saleslines_struct(saleslines, number_of_lines); break;
case 3 : summarise_saleslines(saleslines, number_of_linesp, saleslines_summary, saleslines_grouped); break;
case 4 : free(saleslines); free(saleslines_summary); free(saleslines_grouped); return 0;
}
}
return 0;
}
Update
The issue seems to be with my initialization of the array of structures.
When I initialize it like this: saleslines_t* saleslines;
and then malloc like this: saleslines = malloc(number_of_lines + 1 * sizeof(saleslines_t);
I get a segfault.
But if I initialize like this: saleslines[600]; (allocating more than the number of lines in the file), everything works.
How can I get around this? I would like to be able to dynamically allocate the number of entries within the struct array.
Edit 2
Here are the changes as suggested:
int read_saleslines(saleslines_t** saleslines, int number_of_lines);
saleslines_t* saleslines;
int number_of_lines = 0;
int main(){
while(1){
printf("What would you like to do?\n");
printf("1. Read saleslines.txt\n");
printf("2. Print saleslines\n");
printf("3. Summarise saleslines\n");
printf("4. Exit the program\n");
printf("Number of saleslines = %i\n", number_of_lines);
scanf("%d", &chosen_option);
switch(chosen_option){
/* case 1 : number_of_lines = read_saleslines_file(saleslines, number_of_lines); break; */
case 1 : number_of_lines = read_saleslines(&saleslines, number_of_lines); break;
case 2 : printf("Number of lines is %i", number_of_lines); print_saleslines_struct(saleslines, number_of_lines); break;
case 3 : summarise_saleslines(saleslines, number_of_linesp, saleslines_summary, saleslines_grouped); break;
case 4 : free(saleslines); free(saleslines_summary); free(saleslines_grouped); return 0;
}
}
return 0;
}
int read_saleslines(saleslines_t** saleslines, int number_of_lines)
{
char c;
FILE* fp;
fp = fopen(FILENAME, "r"); /* Open the saleslines file */
if(fp == NULL){ /* Crash if file not found */
printf("Error - file not found\n");
return 0;
}
c = getc(fp);
while (c != EOF){
if (c == '\n'){
number_of_lines += 1;
}
c = getc(fp);
}
fclose(fp);
printf("Number of lines is %d\n", number_of_lines);
*saleslines = (saleslines_t*) malloc((number_of_lines + 1) * sizeof(saleslines_t));
/* allocation of the buffer for every line in the File */
char *buf = malloc(25);
char *tmp;
if ( ( fp = fopen(FILENAME, "r" ) ) == NULL )
{
printf( "File could not be opened.\n" );
}
int i = 0;
while (fgets(buf, 25, fp) != NULL){
if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
buf[strlen (buf) - 1] = '\0';
tmp = strtok(buf, ",");
(*saleslines)[i].salesid = strdup(tmp);
tmp = strtok(NULL, ",");
(*saleslines)[i].smmcampaignid = strdup(tmp);
tmp = strtok(NULL, ",");
(*saleslines)[i].numberofbottles = atoi(tmp);
printf("Salesid: %s\nCampaign: %s\nBottles: %i\n\n", saleslines[i]->salesid , saleslines[i]->smmcampaignid, saleslines[i]->numberofbottles);
i++;
}
free(buf);
fclose(fp);
printf("Number of lines is %i\n", number_of_lines);
return number_of_lines;
}
The program now segfaults after reading the first element in the struct array.
You have a problem with the arguments of read_saleslines(). The first argument should be a pointer to an array of your structs, meaning a double pointer.
In
int read_saleslines(saleslines_t* saleslines, int number_of_lines){
you want to modify where saleslines is pointing. saleslines is a local variable of the function, and the scope is that function. Once you exit read_saleslines(), the variable is "destroyed", meaning that the value it holds it is not accessible anymore. Adding another level of indirection, a pointer, you can modify the variable that's defined outside the function, being that (ugly) global or other. So, change that argument so that the function prototype matches
int read_saleslines(saleslines_t** saleslines, int *);
and change the places where you access it inside the function (adding an * to access it, for example:
saleslines = (saleslines_t*) malloc((number_of_lines * ...
to
*saleslines = (saleslines_t*) malloc((number_of_lines * ...
and
saleslines[i].salesid = strdup(tmp);
to
(*saleslines)[i].salesid = strdup(tmp);
Then add an & where you use the variable outside the function:
number_of_lines = read_saleslines(saleslines, number_of_lines);
changes to
some_var = read_saleslines(&saleslines, &number_of_lines);
That will make you code work.
You have a large number of errors in your code, and with your approach in general. There is no need to make two-passes over the file to determine the number of lines before allocating and then re-reading the file in an attempt to parse the data. Further, there is no need to tokenize each line to separate the comma-separated-values, sscanf() to parse the two strings and one int is sufficient here after reading each line with fgets.
While you are free to pass any mix of parameters you like and return whatever you like, since you are allocating for an array of struct and reading values into the array, it makes sense to return a pointer to the allocated array from your function (or NULL on failure) and simply update a parameter passed as a pointer to make the total number of lines read available back in the caller.
Further, generally you want to open and validate the file in the caller and pass a FILE* parameter passing the open file stream to your function. With that in mind, you could refactor your function as:
/* read saleslines into array of saleslines_t, allocating for
* salesid, and smmcampaignid within each struct. Return pointer
* to allocated array on success with lines updated to hold the
* number of elements, or NULL otherwise.
*/
saleslines_t *read_saleslines (FILE *fp, size_t *lines)
{
Within your function, you simply need a buffer to hold each line read, a counter to track the number of elements allocated in your array, and a pointer to your array to return. For example, you could do something like the following to handle all three:
char buf[MAXC]; /* buffer to hold line */
size_t maxlines = MINL; /* maxlines allocated */
saleslines_t *sales = NULL; /* pointer to array of struct */
(note: since you are tracking the number of lines read through the pointer lines passed as a parameter, it would make sense to initialize the value at that address to zero)
Now the work of your function begins, you want to read each line into buf and parse the needed information from each line. Since salesid and smmcampaignid are both pointers-to-char in your struct, you will need to allocate a block of memory for each string parsed from the line, copy the string to the new block of memory, and then assign the beginning address for the bock to each of your pointers. To "dynamically" handle allocating elements for your struct, you simply check if the number of lines (*lines) filled equals against the number allocated (maxlines), (or if *lines is zero indicating a need for an initial allocation), and realloc in both cases to either realloc (or newly allocate) storage for your array of struct.
When you realloc you always realloc using a temporary pointer so if realloc fails and returns NULL, you don't overwrite your pointer to the currently allocated block with NULL thereby creating a memory leak.
Putting all that together at the beginning of your function may seem daunting, but it is actually straight forward, e.g.
while (fgets (buf, MAXC, fp)) { /* read each line in file */
char id[MAXC], cid[MAXC]; /* temp arrays to hold strings */
int bottles; /* temp int for numberofbottles */
if (*lines == maxlines || !*lines) { /* check if realloc req'd */
/* always realloc with a temp pointer */
void *tmp = realloc (sales, 2 * maxlines * sizeof *sales);
if (!tmp) { /* if realloc fails, original pointer still valid */
perror ("realloc-sales"); /* throw error */
return sales; /* return current pointer */
} /* (don't exit or return NULL) */
sales = tmp; /* assign reallocated block to sales */
/* (optional) zero newly allocated memory */
memset (sales + *lines, 0, maxlines * sizeof *sales);
maxlines *= 2; /* update maxlines allocated */
}
Now you are ready to parse the wanted information from your line with sscanf, and then following a successful parse of information, you can allocate for each of your salesid and smmcampaignid pointers, copy the parsed information to the new blocks of memory assigning the beginning address to each pointer, respectively, e.g.
/* parse needed data from line (sscanf is fine here) */
if (sscanf (buf, "%1023[^,],%1023[^,],%d", id, cid, &bottles) == 3) {
size_t idlen = strlen (id), /* get lengths of strings */
cidlen = strlen (cid);
sales[*lines].salesid = malloc (idlen + 1); /* allocate string */
if (!sales[*lines].salesid) { /* validate! */
perror ("malloc-sales[*lines].salesid");
break;
}
sales[*lines].smmcampaignid = malloc (cidlen + 1); /* ditto */
if (!sales[*lines].smmcampaignid) {
perror ("malloc-sales[*lines].smmcampaignid");
break;
}
memcpy (sales[*lines].salesid, id, idlen + 1); /* copy strings */
memcpy (sales[*lines].smmcampaignid, cid, cidlen + 1);
sales[(*lines)++].numberofbottles = bottles; /* assign int */
} /* (note lines counter updated in last assignment) */
(note: you can use strdup to both get the length of each string parsed and allocate sufficient memory to hold the string and assign that to your pointer in one-shot, e.g. sales[*lines].salesid = strdup (id);, but... strdup is not required to be included in C99 or later, so it is just as simple to get the length, allocate length + 1 bytes and then memcpy your string manually to ensure portability. Further, since strdup allocates memory, you must validate the pointer returned -- something overlooked by 99% of those using it.)
That's it, when fgets() fails, you have reached EOF, now simply:
return sales; /* return dynamically allocated array of struct */
}
Putting it altogether in a short, working example that takes the filename to read as the first argument to your program (or reads from stdin by default if no argument is given), you could do:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
#define MINL 2
typedef struct saleslines{
char *salesid;
char *smmcampaignid;
int numberofbottles;
} saleslines_t;
/* read saleslines into array of saleslines_t, allocating for
* salesid, and smmcampaignid within each struct. Return pointer
* to allocated array on success with lines updated to hold the
* number of elements, or NULL otherwise.
*/
saleslines_t *read_saleslines (FILE *fp, size_t *lines)
{
char buf[MAXC]; /* buffer to hold line */
size_t maxlines = MINL; /* maxlines allocated */
saleslines_t *sales = NULL; /* pointer to array of struct */
*lines = 0; /* zero lines */
while (fgets (buf, MAXC, fp)) { /* read each line in file */
char id[MAXC], cid[MAXC]; /* temp arrays to hold strings */
int bottles; /* temp int for numberofbottles */
if (*lines == maxlines || !*lines) { /* check if realloc req'd */
/* always realloc with a temp pointer */
void *tmp = realloc (sales, 2 * maxlines * sizeof *sales);
if (!tmp) { /* if realloc fails, original pointer still valid */
perror ("realloc-sales"); /* throw error */
return sales; /* return current pointer */
} /* (don't exit or return NULL) */
sales = tmp; /* assign reallocated block to sales */
/* (optional) zero newly allocated memory */
memset (sales + *lines, 0, maxlines * sizeof *sales);
maxlines *= 2; /* update maxlines allocated */
}
/* parse needed data from line (sscanf is fine here) */
if (sscanf (buf, "%1023[^,],%1023[^,],%d", id, cid, &bottles) == 3) {
size_t idlen = strlen (id), /* get lengths of strings */
cidlen = strlen (cid);
sales[*lines].salesid = malloc (idlen + 1); /* allocate string */
if (!sales[*lines].salesid) { /* validate! */
perror ("malloc-sales[*lines].salesid");
break;
}
sales[*lines].smmcampaignid = malloc (cidlen + 1); /* ditto */
if (!sales[*lines].smmcampaignid) {
perror ("malloc-sales[*lines].smmcampaignid");
break;
}
memcpy (sales[*lines].salesid, id, idlen + 1); /* copy strings */
memcpy (sales[*lines].smmcampaignid, cid, cidlen + 1);
sales[(*lines)++].numberofbottles = bottles; /* assign int */
} /* (note lines counter updated in last assignment) */
}
return sales; /* return dynamically allocated array of struct */
}
int main (int argc, char **argv) {
saleslines_t *sales = NULL; /* pointer to saleslines_t */
size_t nlines;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
sales = read_saleslines (fp, &nlines); /* read saleslines */
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < nlines; i++) { /* loop over each */
printf ("sales[%2zu]: %s %s %2d\n", i, sales[i].salesid,
sales[i].smmcampaignid, sales[i].numberofbottles);
free (sales[i].salesid); /* free salesid */
free (sales[i].smmcampaignid); /* free smmcampaignid */
}
free (sales); /* free sales */
return 0;
}
Example Use/Output
$ ./bin/saleslines dat/saleslines.txt
sales[ 0]: SO02773202 5087001 0
sales[ 1]: SO02773203 5087001 0
sales[ 2]: SO02773204 5087001 0
sales[ 3]: SO02773205 5087001 0
sales[ 4]: SO02773206 5087001 14
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to insure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/saleslines dat/saleslines.txt
==19819== Memcheck, a memory error detector
==19819== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==19819== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==19819== Command: ./bin/saleslines dat/saleslines.txt
==19819==
sales[ 0]: SO02773202 5087001 0
sales[ 1]: SO02773203 5087001 0
sales[ 2]: SO02773204 5087001 0
sales[ 3]: SO02773205 5087001 0
sales[ 4]: SO02773206 5087001 14
==19819==
==19819== HEAP SUMMARY:
==19819== in use at exit: 0 bytes in 0 blocks
==19819== total heap usage: 13 allocs, 13 frees, 935 bytes allocated
==19819==
==19819== All heap blocks were freed -- no leaks are possible
==19819==
==19819== For counts of detected and suppressed errors, rerun with: -v
==19819== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
There is nothing difficult in dynamically allocating for anything. Just take it in small enough pieces that you dot all "I's" and cross all "T's" for each pointer requiring allocation. Look things over and let me know if you have further questions.

Reading text file using fgets() and strtok() to separate strings in line yielding unwanted behaviour

I am trying to read a text file with the following format, using fgets() and strtok().
1082018 1200 79 Meeting with President
2012018 1200 79 Meet with John at cinema
2082018 1400 30 games with Alpha
3022018 1200 79 sports
I need to separate the first value from the rest of the line, for example:
key=21122019, val = 1200 79 Meeting with President
To do so I am using strchr() for val and strtok() for key, however, the key value remains unchanged when reading from file. I can't understand why this is happening since I am allocating space for in_key inside the while loop and placing inside an array at a different index each time.
My code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 1000 // max number of lines to be read
#define VALLEN 100
#define MAXC 1024
#define ALLOCSIZE 1000 /*size of available space*/
static char allocbuf[ALLOCSIZE]; /* storage for alloc*/
static char *allocp = allocbuf; /* next free position*/
char *alloc(int n) { /* return a pointer to n characters*/
if (allocbuf + ALLOCSIZE - allocp >= n) { /*it fits*/
allocp += n;
return allocp - n; /*old p*/
} else /*not enough room*/
return 0;
}
int main(int argc, char** argv) {
FILE *inp_cal;
inp_cal = fopen("calendar.txt", "r+");
char buf[MAXC];
char *line[1024];
char *p_line;
char *in_val_arr[100];
char *in_key_arr[100];
int count = 0;
char delimiter[] = " ";
if (inp_cal) {
printf("Processing file...\n");
while (fgets(buf, MAXC, inp_cal)) {
p_line = malloc(strlen(buf) + 1); // malloced with size of buffer.
char *in_val;
char *in_key;
strcpy(p_line, buf); //used to create a copy of input buffer
line[count] = p_line;
/* separating the line based on the first space. The words after
* the delimeter will be copied into in_val */
char *copy = strchr(p_line, ' ');
if (copy) {
if ((in_val = alloc(strlen(line[count]) + 1)) == NULL) {
return -1;
} else {
strcpy(in_val, copy + 1);
printf("arr: %s", in_val);
in_val_arr[count] = in_val;
}
} else
printf("Could not find a space\n");
/* We now need to get the first word from the input buffer*/
if ((in_key = alloc(strlen(line[count]) + 1)) == NULL) {
return -1;
}
else {
in_key = strtok(buf, delimiter);
printf("%s\n", in_key);
in_key_arr[count] = in_key; // <-- Printed out well
count++;
}
}
for (int i = 0; i < count; ++i)
printf("key=%s, val = %s", in_key_arr[i], in_val_arr[i]); //<-- in_key_arr[i] contains same values throughout, unlike above
fclose(inp_cal);
}
return 0;
}
while-loop output (correct):
Processing file...
arr: 1200 79 Meeting with President
1082018
arr: 1200 79 Meet with John at cinema
2012018
arr: 1400 30 games with Alpha
2082018
arr: 1200 79 sports
3022018
for-loop output (incorrect):
key=21122019, val = 1200 79 Meeting with President
key=21122019, val = 1200 79 Meet with John
key=21122019, val = 1400 30 games with Alpha
key=21122019, val = 1200 79 sports
Any suggestions on how this can be improved and why this is happening? Thanks
Continuing for the comment, in attempting to use strtok to separate your data into key, val, somenum and the remainder of the line as a string, you are making things harder than it need be.
If the beginning of your lines are always:
key val somenum rest
you can simply use sscanf to parse key, val and somenum into, e.g. three unsigned values and the rest of the line into a string. To help preserve the relationship between each key, val, somenum and string, storing the values from each line in a struct is greatly ease keeping track of everything. You can even allocate for the string to minimize storage to the exact amount required. For example, you could use something like the following:
typedef struct { /* struct to handle values */
unsigned key, val, n;
char *s;
} keyval_t;
Then within main() you could allocate for some initial number of struct, keep an index as a counter, loop reading each line using a temporary stuct and buffer, then allocating for the string (+1 for the nul-terminating character) and copying the values to your struct. When the number of structs filled reaches your allocated amount, simply realloc the number of structs and keep going.
For example, let's say you initially allocate for NSTRUCT struts and read each line into buf, e.g.
...
#define NSTRUCT 8 /* initial struct to allocate */
#define MAXC 1024 /* read buffer size (don't skimp) */
...
/* allocate/validate storage for max struct */
if (!(kv = malloc (max * sizeof *kv))) {
perror ("malloc-kv");
return 1;
}
...
size_t ndx = 0, /* used */
max = NSTRUCT; /* allocated */
keyval_t *kv = NULL; /* ptr to struct */
...
while (fgets (buf, MAXC, fp)) { /* read each line of input */
...
Within your while loop, you simply need to parse the values with sscanf, e.g.
char str[MAXC];
size_t len;
keyval_t tmp = {.key = 0}; /* temporary struct for parsing */
if (sscanf (buf, "%u %u %u %1023[^\n]", &tmp.key, &tmp.val, &tmp.n,
str) != 4) {
fprintf (stderr, "error: invalid format, line '%zu'.\n", ndx);
continue;
}
With the values parsed, you check whether your index has reached the number of struct you have allocated and realloc if required (note the use of a temporary pointer to realloc), e.g.
if (ndx == max) { /* check if realloc needed */
/* always realloc with temporary pointer */
void *kvtmp = realloc (kv, 2 * max * sizeof *kv);
if (!kvtmp) {
perror ("realloc-kv");
break; /* don't exit, kv memory still valid */
}
kv = kvtmp; /* assign new block to pointer */
max *= 2; /* increment max allocated */
}
Now with storage for the struct, simply get the length of the string, copy the unsigned values to your struct, and allocate length + 1 chars for kv[ndx].s and copy str to kv[ndx].s, e.g.
len = strlen(str); /* get length of str */
kv[ndx] = tmp; /* assign tmp values to kv[ndx] */
kv[ndx].s = malloc (len + 1); /* allocate block for str */
if (!kv[ndx].s) { /* validate */
perror ("malloc-kv[ndx].s");
break; /* ditto */
}
memcpy (kv[ndx++].s, str, len + 1); /* copy str to kv[ndx].s */
}
(note: you can use strdup if you have it to replace malloc through memcpy with kv[ndx].s = strdup (str);, but since strdup allocates, don't forget to check kv[ndx].s != NULL before incrementing ndx if you go that route)
That's pretty much the easy and robust way to capture your data. It is now contained in an allocated array of struct which you can use as needed, e.g.
for (size_t i = 0; i < ndx; i++) {
printf ("kv[%2zu] : %8u %4u %2u %s\n", i,
kv[i].key, kv[i].val, kv[i].n, kv[i].s);
free (kv[i].s); /* free string */
}
free (kv); /* free stucts */
(don't forget to free the memory you allocate)
Putting it altogether, you could do something like the following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NSTRUCT 8 /* initial struct to allocate */
#define MAXC 1024 /* read buffer size (don't skimp) */
typedef struct { /* struct to handle values */
unsigned key, val, n;
char *s;
} keyval_t;
int main (int argc, char **argv) {
char buf[MAXC]; /* line buffer */
size_t ndx = 0, /* used */
max = NSTRUCT; /* allocated */
keyval_t *kv = NULL; /* ptr to struct */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("fopen-file");
return 1;
}
/* allocate/validate storage for max struct */
if (!(kv = malloc (max * sizeof *kv))) {
perror ("malloc-kv");
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read each line of input */
char str[MAXC];
size_t len;
keyval_t tmp = {.key = 0}; /* temporary struct for parsing */
if (sscanf (buf, "%u %u %u %1023[^\n]", &tmp.key, &tmp.val, &tmp.n,
str) != 4) {
fprintf (stderr, "error: invalid format, line '%zu'.\n", ndx);
continue;
}
if (ndx == max) { /* check if realloc needed */
/* always realloc with temporary pointer */
void *kvtmp = realloc (kv, 2 * max * sizeof *kv);
if (!kvtmp) {
perror ("realloc-kv");
break; /* don't exit, kv memory still valid */
}
kv = kvtmp; /* assign new block to pointer */
max *= 2; /* increment max allocated */
}
len = strlen(str); /* get length of str */
kv[ndx] = tmp; /* assign tmp values to kv[ndx] */
kv[ndx].s = malloc (len + 1); /* allocate block for str */
if (!kv[ndx].s) { /* validate */
perror ("malloc-kv[ndx].s");
break; /* ditto */
}
memcpy (kv[ndx++].s, str, len + 1); /* copy str to kv[ndx].s */
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
for (size_t i = 0; i < ndx; i++) {
printf ("kv[%2zu] : %8u %4u %2u %s\n", i,
kv[i].key, kv[i].val, kv[i].n, kv[i].s);
free (kv[i].s); /* free string */
}
free (kv); /* free stucts */
}
Example Use/Output
Using your data file as input, you would receive the following:
$ ./bin/fgets_sscanf_keyval <dat/keyval.txt
kv[ 0] : 1082018 1200 79 Meeting with President
kv[ 1] : 2012018 1200 79 Meet with John at cinema
kv[ 2] : 2082018 1400 30 games with Alpha
kv[ 3] : 3022018 1200 79 sports
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to insure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/fgets_sscanf_keyval <dat/keyval.txt
==6703== Memcheck, a memory error detector
==6703== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==6703== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==6703== Command: ./bin/fgets_sscanf_keyval
==6703==
kv[ 0] : 1082018 1200 79 Meeting with President
kv[ 1] : 2012018 1200 79 Meet with John at cinema
kv[ 2] : 2082018 1400 30 games with Alpha
kv[ 3] : 3022018 1200 79 sports
==6703==
==6703== HEAP SUMMARY:
==6703== in use at exit: 0 bytes in 0 blocks
==6703== total heap usage: 5 allocs, 5 frees, 264 bytes allocated
==6703==
==6703== All heap blocks were freed -- no leaks are possible
==6703==
==6703== For counts of detected and suppressed errors, rerun with: -v
==6703== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
Look things over and let me now if you have any further questions. If you need to further split kv[i].s, then you can think about using strtok.
You are storing the same pointer in the in_key_arr over and over again.
You roughly need this:
in_key = strtok(buf, delimiter);
printf("%s\n", in_key);
char *newkey = malloc(strlen(in_key) + 1); // <<<< allocate new memory
strcpy(newkey, in_key);
in_key_arr[count] = newkey; // <<<< store newkey
count++;
Disclaimer:
no error checking is done for brevity
the malloced memory needs to be freed once you're done with it.
you are assigning an address with the call to alloc then reassigning with call to strtok? rewriting the same address? Copy return from strtok to in_key?
char *copy = strchr(p_line, ' ');
if (copy) {
if ((in_val = alloc(strlen(line[count]) + 1)) == NULL) {
return -1;
} else {
printf("arr: %ul\n", in_val);
strcpy(in_val, copy + 1);
printf("arr: %s", in_val);
in_val_arr[count] = in_val;
}
} else
printf("Could not find a space\n");
/* We now need to get the first word from the input buffer*/
if ((in_key = alloc(strlen(line[count]) + 1)) == NULL) {
return -1;
}
else {
printf("key: %ul\n", in_key);
in_key = strtok(buf, delimiter);
printf("key:\%ul %s\n",in_key, in_key);
in_key_arr[count++] = in_key; // <-- Printed out well
}
output:
allocbuf: 1433760064l
Processing file...
all: 1433760064l
arr: 1433760064l
arr: 1200 79 Meeting with President
all: 1433760104l
key: 1433760104l
key:4294956352l 1082018
this change fixed it:
strcpy(in_key, strtok(buf, delimiter));

Count the reocurrence of words in text file

Expanding on my a previous exercise, I have a text file that is filled with one word per line.
hello
hi
hello
bonjour
bonjour
hello
As I read these words from the file I would like to compare them to an array of struct pointers (created from the text file). If the word does not exist within the array, the word should be stored into a struct pointer with a count of 1. If the word already exist in the array the count should increase by 1. I will write the outcome into a new file (that already exist).
hello = 3
hi = 1
bonjour = 2
this is my code
#include <stdio.h>
#include <stdlib.h>
struct wordfreq{
int count;
char *word;
};
int main(int argc, char * argv[]) {
struct wordfreq *words[1000] = {NULL};
int i, j, f = 0;
for(i=0; i <1000; i++)
words[i] = (struct wordfreq*)malloc(sizeof(struct wordfreq));
FILE *input = fopen(argv[1], "r");
FILE *output = fopen(argv[2], "w");
if(input == NULL){
printf("Error! Can't open file.\n");
exit(0);
}
char str[20];
i=0;
while(fscanf(input, "%s[^\n]", &str) ==1){
//fprintf(output, "%s:\n", str);
for(j=0; j<i; j++){
//fprintf(output, "\t%s == %s\n", str, words[j] -> word);
if(str == words[j]->word){
words[j] ->count ++;
f = 1;
}
}
if(f==0){
words[i]->word = str;
words[i]->count = 1;
}
//fprintf(output, "\t%s = %d\n", words[i]->word, words[i]->count);
i++;
}
for(j=0; j< i; j++)
fprintf(output, "%s = %d\n", words[j]->word, words[j]->count);
for(i=0; i<1000; i++){
free(words[i]);
}
return 0;
}
I used several fprintf statements to look at my values and I can see that while str is right, when I reach the line to compare str to the other array struct pointers (str == words[I]->word) during the transversal words[0] -> word is always the same as str and the rest of the words[i]->words are (null). I am still trying to completely understand mixing pointes and structs, with that said any thoughts, comments, complains?
You may be making things a bit harder than necessary, and you are certainly allocating 997 more structures than necessary in the case of your input file. There is no need to allocate all 1000 structs up front. (you are free to do so, it's just a memory management issue). The key is that you only need allocate a new struct each time a unique word is encountered. (in the case of your data file, 3-times). For all other cases, you are simply updating count to add the occurrence for a word you have already stored.
Also, if there is no compelling reason to use a struct, it is just as easy to use an array of pointers-to-char as your pointers to each word, and then a simple array of int [1000] as your count (or frequency) array. Your choice. In the case of two arrays, you only need to allocate for each unique word and never need a separate allocation for each struct.
Putting those pieces together, you could reduce your code (not including the file -- which can be handled by simple redirection) to the following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 128, MAXW = 1000 };
struct wordfreq{
int count;
char *word;
};
int main (void) {
struct wordfreq *words[MAXW] = {0};
char tmp[MAXC] = "";
int n = 0;
/* while < MAXW unique words, read each word in file */
while (n < MAXW && fscanf (stdin, " %s", tmp) == 1) {
int i;
for (i = 0; i < n; i++) /* check against exising words */
if (strcmp (words[i]->word, tmp) == 0) /* if exists, break */
break;
if (i < n) { /* if exists */
words[i]->count++; /* update frequency */
continue; /* get next word */
}
/* new word found, allocate struct and
* allocate storage for word (+ space for nul-byte)
*/
words[n] = malloc (sizeof *words[n]);
words[n]->word = malloc (strlen (tmp) + 1);
if (!words[n] || !words[n]->word) { /* validate ALL allocations */
fprintf (stderr, "error: memory exhausted, words[%d].\n", n);
break;
}
words[n]->count = 0; /* initialize count */
strcpy (words[n]->word, tmp); /* copy new word to words[n] */
words[n]->count++; /* update frequency to 1 */
n++; /* increment word count */
}
for (int i = 0; i < n; i++) { /* for each word */
printf ("%s = %d\n", words[i]->word, words[i]->count);
free (words[i]->word); /* free memory when no longer needed */
free (words[i]);
}
return 0;
}
Example Input File
$ cat dat/wordfile.txt
hello
hi
hello
bonjour
bonjour
hello
Example Use/Output
$ ./bin/filewordfreq <dat/wordfile.txt
hello = 3
hi = 1
bonjour = 2
As with any code that dynamically allocates memory, you will want to validate your use of the memory to insure you have not written beyond the bounds or based a conditional move or jump on an uninitialized value. In Linux, valgrind is the natural choice (there are similar programs for each OS). Just run you program through it, e.g.:
$ valgrind ./bin/filewordfreqstruct <dat/wordfile.txt
==2000== Memcheck, a memory error detector
==2000== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==2000== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==2000== Command: ./bin/filewordfreqstruct
==2000==
hello = 3
hi = 1
bonjour = 2
==2000==
==2000== HEAP SUMMARY:
==2000== in use at exit: 0 bytes in 0 blocks
==2000== total heap usage: 6 allocs, 6 frees, 65 bytes allocated
==2000==
==2000== All heap blocks were freed -- no leaks are possible
==2000==
==2000== For counts of detected and suppressed errors, rerun with: -v
==2000== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Verify that you free all memory you allocate and that there are no memory errors.
Look things over and let me know if you have any further questions.
Using 2-Arrays Instead of a struct
As mentioned above, sometimes using a storage array and a frequency array can simplify accomplishing the same thing. Whenever you are faced with needing the frequency of any "set", your first thought should be a frequency array. It is nothing more than an array of the same size as the number of items in your "set", (initialized to 0 at the beginning). The same approach applies, when you add (or find a duplicate of an existing) element in your storage array, you increment the corresponding element in your frequency array by 1. When you are done, your frequency array elements hold the frequency the corresponding elements in your storage array appear.
Here is an equivalent to the program above.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 128, MAXW = 1000 };
int main (void) {
char *words[MAXW] = {NULL}, /* storage array of pointers to char* */
tmp[MAXC] = "";
int freq[MAXW] = {0}, n = 0; /* simple integer frequency array */
/* while < MAXW unique words, read each word in file */
while (n < MAXW && fscanf (stdin, " %s", tmp) == 1) {
int i;
for (i = 0; words[i]; i++) /* check against exising words */
if (strcmp (words[i], tmp) == 0) /* if exists, break */
break;
if (words[i]) { /* if exists */
freq[i]++; /* update frequency */
continue; /* get next word */
}
/* new word found, allocate storage (+ space for nul-byte) */
words[n] = malloc (strlen (tmp) + 1);
if (!words[n]) { /* validate ALL allocations */
fprintf (stderr, "error: memory exhausted, words[%d].\n", n);
break;
}
strcpy (words[n], tmp); /* copy new word to words[n] */
freq[n]++; /* update frequency to 1 */
n++; /* increment word count */
}
for (int i = 0; i < n; i++) { /* for each word */
printf ("%s = %d\n", words[i], freq[i]); /* output word + freq */
free (words[i]); /* free memory when no longer needed */
}
return 0;
}
Using this approach, you eliminate 1/2 of your memory allocations by using a statically declared frequency array for your count. Either way is fine, it is largely up to you.

Storing and accessing data in memory using pointers from txt file

So I'm currently working on a project that uses data from a txt file. The user is prompted for the filename, and the first two lines of the txt file are integers that essentially contain the row and column values of the txt file.
There are two things that are confusing me when writing this program the way my instructor is asking. For the criteria, she says:
read in the data and place into an array of data and
your code should access memory locations via pointers and pointer arithmetic, no []'s in the code you submit.
The left-most column is an identifier while the rest of the row should be considered as that rows data (floating point values).
An example of what the file might contain is:
3
4
abc123 8.55 5 0 10
cdef123 83.50 10.5 10 55
hig123 7.30 6 0 1.9
My code:
//Creates array for 100 characters for filename
char fileName[100];
printf("Enter the file name to be read from: ");
scanf("%s", fileName);
FILE *myFile;
myFile = fopen(fileName, "r");
//Checks if file opened correctly
if (myFile == NULL)
{
printf("Error opening file\n"); //full file name must be entered
}
else {
printf("File opened successfully\n");
}
//gets value of records and value per records from file
//This will be the first 2 lines from line
fscanf(myFile, "%d %d", &records, &valuesPerRecords);
//printf("%d %d\n", records, valuesPerRecords); //Check int values from file
int counter = 0;
char *ptr_data;
ptr_data = (char*)malloc(records*(valuesPerRecords));
int totalElements = records*(valuesPerRecords);
/*If malloc can't allocate enough space, print error*/
if (ptr_data == NULL) {
printf("Error\n");
exit(-1);
}
int counter;
for (counter = 0; counter < totalElements; counter++){
fscanf(myFile, "%s", &ptr_data);
}
so I'm wondering if so far, I'm on the right track. I can't seem to think of a way to have the first column read in as a string, while the rest is read in as integers. I'll also have to use the stored values later and sort them but that's a different problem for a later date.
First off, your prof apparently wants you to become familiar with walking a pointer through a collection of both strings (the labels) and numbers (the floating-point values) using pointer arithmetic without using array indexing. A solid pointer familiarity assignment.
To handle the labels you can use a pointer to pointer to type char (a double pointer) as each pointer will point to an array of chars. You can declare and allocate pointers for labels as follows. (this assumes you have already read the rows and cols values from the input file)
char buf[MAXC] = "", /* temporary line buffer */
**labels = NULL, /* collection of labels */
**lp = NULL; /* pointers to walk labels */
...
/* allocate & validate cols char* pointers */
if (!(labels = calloc (rows, sizeof *labels))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return 1;
}
You can do the same thing for your pointer values, except you only need a pointer to type double as you will simply need to allocate for a collection of doubles.
double *mtrx = NULL, /* collection of numbers */
*p; /* pointers to walk numbers */
...
nptrs = rows * cols; /* set number of poiners required */
/* allocate & validate nptrs doubles */
if (!(mtrx = calloc (nptrs, sizeof *mtrx))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return 1;
}
The use of the pointers lp and p are crucial because you cannot increment either labels or mtrx (without saving the original address) because doing so will lose the pointer to the start of the memory allocated to each, immediately causing a memory leak (you have no way to free the block) and preventing you from ever being able to access the beginning again. Each time you need to walk over labels or mtrx just assign the start address to the pointer, e.g.
p = mtrx; /* set pointer p to mtrx */
lp = labels; /* set poiners lp to labels */
Now you are free to read and parse the lines in any manner you choose, but I would strongly recommend using line-oriented-input functions to read each line into a temporary line buffer, and then parse the values you need using sscanf. This has many advantages to reading with fscanf alone. After you read each line, you can parse/validate each value before allocating space for the strings and assigning the values.
(note: I cheat below with a single sscanf call, where you should actually assign a char* pointer to buf, read the label, then loop cols number of times (perhaps using strtok/strtod) checking each value and assigning to mtrx, -- that is left to you)
/* read each remaining line, allocate/fill pointers */
while (ndx < rows && fgets (buf, MAXC, fp)) {
if (*buf == '\n') continue; /* skip empty lines */
char label[MAXC] = ""; /* temp storage for labels */
double val[cols]; /* temp storage for numbers */
if (sscanf (buf, "%s %lf %lf %lf %lf", /* parse line */
label, &val[0], &val[1], &val[2], &val[3]) ==
(int)(cols + 1)) {
*lp++ = strdup (label); /* alloc/copy label */
for (i = 0; i < cols; i++) /* alloc/copy numbers */
*p++ = val[i];
ndx++; /* increment index */
}
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
Then it is simply a matter of looping over the values again, using or outputting as needed, and then freeing the memory you allocated. You could do that with something similar to:
p = mtrx; /* reset pointer p to mtrx */
lp = labels; /* reset poiners lp to labels */
for (i = 0; i < rows; i++) {
printf (" %-10s", *lp);
free (*lp++);
for (j = 0; j < cols; j++)
printf (" %7.2lf", *p++);
putchar ('\n');
}
free (mtrx); /* free pointers */
free (labels);
That's basically one of many approaches. Putting it all together, you could do:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 512 }; /* constants for max chars per-line */
int main (int argc, char **argv) {
char buf[MAXC] = "", /* temporary line buffer */
**labels = NULL, /* collection of labels */
**lp = NULL; /* pointers to walk labels */
double *mtrx = NULL, /* collection of numbers */
*p; /* pointers to walk numbers */
size_t i, j, ndx = 0, rows = 0, cols = 0, nptrs = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (fgets (buf, MAXC, fp)) /* get rows, ignore blank lines */
if (sscanf (buf, "%zu", &rows) == 1)
break;
while (fgets (buf, MAXC, fp)) /* get cols, ignore blank lines */
if (sscanf (buf, "%zu", &cols) == 1)
break;
if (!rows || !cols) { /* validate rows & cols > 0 */
fprintf (stderr, "error: rows and cols values not found.\n");
return 1;
}
nptrs = rows * cols; /* set number of poiners required */
/* allocate & validate nptrs doubles */
if (!(mtrx = calloc (nptrs, sizeof *mtrx))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return 1;
}
/* allocate & validate rows char* pointers */
if (!(labels = calloc (rows, sizeof *labels))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return 1;
}
p = mtrx; /* set pointer p to mtrx */
lp = labels; /* set poiners lp to labels */
/* read each remaining line, allocate/fill pointers */
while (ndx < rows && fgets (buf, MAXC, fp)) {
if (*buf == '\n') continue; /* skip empty lines */
char label[MAXC] = ""; /* temp storage for labels */
double val[cols]; /* temp storage for numbers */
if (sscanf (buf, "%s %lf %lf %lf %lf", /* parse line */
label, &val[0], &val[1], &val[2], &val[3]) ==
(int)(cols + 1)) {
*lp++ = strdup (label); /* alloc/copy label */
for (i = 0; i < cols; i++) /* alloc/copy numbers */
*p++ = val[i];
ndx++; /* increment index */
}
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
p = mtrx; /* reset pointer p to mtrx */
lp = labels; /* reset poiners lp to labels */
for (i = 0; i < rows; i++) {
printf (" %-10s", *lp);
free (*lp++);
for (j = 0; j < cols; j++)
printf (" %7.2lf", *p++);
putchar ('\n');
}
free (mtrx); /* free pointers */
free (labels);
return 0;
}
Example Input File Used
$ cat dat/arrinpt.txt
3
4
abc123 8.55 5 0 10
cdef123 83.50 10.5 10 55
hig123 7.30 6 0 1.9
Example Use/Output
$ ./bin/arrayptrs <dat/arrinpt.txt
abc123 8.55 5.00 0.00 10.00
cdef123 83.50 10.50 10.00 55.00
hig123 7.30 6.00 0.00 1.90
Memory Use/Error Check
In any code your write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to insure you haven't written beyond/outside your allocated block of memory, attempted to read or base a jump on an uninitialized value and finally to confirm that you have freed all the memory you have allocated. For Linux valgrind is the normal choice.
$ valgrind ./bin/arrayptrs <dat/arrinpt.txt
==22210== Memcheck, a memory error detector
==22210== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==22210== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info
==22210== Command: ./bin/arrayptrs
==22210==
abc123 8.55 5.00 0.00 10.00
cdef123 83.50 10.50 10.00 55.00
hig123 7.30 6.00 0.00 1.90
==22210==
==22210== HEAP SUMMARY:
==22210== in use at exit: 0 bytes in 0 blocks
==22210== total heap usage: 5 allocs, 5 frees, 142 bytes allocated
==22210==
==22210== All heap blocks were freed -- no leaks are possible
==22210==
==22210== For counts of detected and suppressed errors, rerun with: -v
==22210== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
Always confirm All heap blocks were freed -- no leaks are possible and equally important ERROR SUMMARY: 0 errors from 0 contexts. Note: some OS's do not provide adequate leak and error suppression files (the file that excludes system and OS memory from being reported as in use) which will cause valgrind to report that some memory has not yet been freed (despite the fact you have done your job and freed all blocks you allocated and under your control).
Look things over and let me know if you have any questions.

Reading a stream of values from text file in C

I have a text file which may contain one or up to 400 numbers. Each number is separated by a comma and a semicolon is used to indicate end of numbers stream.
At the moment I am reading the text file line by line using the fgets. For this reason I am using a fixed array of 1024 elements (the maximum characters per line for a text file).
This is not the ideal way how to implement this since if only one number is inputted in the text file, an array of 1024 elements will we pointless.
Is there a way to use fgets with the malloc function (or any other method) to increase memory efficiency?
If you are looking into using this in a production code then I would request you to follow the suggestions put in the comments section.
But if you requirement is more for learning or school, then here is a complex approach.
Pseudo code
1. Find the size of the file in bytes, you can use "stat" for this.
2. Since the file format is known, from the file size, calculate the number of items.
3. Use the number of items to malloc.
Voila! :p
How to find file size
You can use stat as shown below:
#include <sys/stat.h>
#include <stdio.h>
int main(void)
{
struct stat st;
if (stat("file", &st) == 0) {
printf("fileSize: %d No. of Items: %d\n", (st.st_size), (st.st_size/2));
return st.st_size;
}
printf("failed!\n");
return 0;
}
This file when run will return the file size:
$> cat file
1;
$> ./a.out
fileSize: 3 No. of Items: 1
$> cat file
1,2,3;
$> ./a.out
fileSize: 7 No. of Items: 3
Disclaimer: Is this approach to minimize the pre-allocated memory an optimal approach? No ways in heaven! :)
Dynamically allocating space for you data is a fundamental tool for working in C. You might as well pay the price to learn. The primary thing to remember is,
"if you allocate memory, you have the responsibility to track its use
and preserve a pointer to the starting address for the block of
memory so you can free it when you are done with it. Otherwise your
code with leak memory like a sieve."
Dynamic allocation is straight forward. You allocate some initial block of memory and keep track of what you add to it. You must test that each allocation succeeds. You must test how much of the block of memory you use and reallocate or stop writing data when full to prevent writing beyond the end of your block of memory. If you fail to test either, you will corrupt the memory associated with your code.
When you reallocate, always reallocate using a temporary pointer because with a reallocation failure, the original block of memory is freed. (causing loss of all previous data in that block). Using a temporary pointer allows you to handle failure in a manner to preserve that block if needed.
Taking that into consideration, below we initially allocate space for 64 long values (you can easily change to code to handle any type, e.g. int, float, double...). The code then reads each line of data (using getline to dynamically allocate the buffer for each line). strtol is used to parse the buffer assigning values to the array. idx is used as an index to keep track of how many values have been read, and when idx reaches the current nmax, array is reallocated twice as large as it previously was and nmax is updated to reflect the change. The reading, parsing, checking and reallocating continues for every line of data in the file. When done, the values are printed to stdout, showing the 400 random values read from the test file formatted as 353,394,257,...293,58,135;
To keep the read loop logic clean, I've put the error checking for the strtol conversion into a function xstrtol, but you are free to include that code in main() if you like. The same applies to the realloc_long function. To see when the reallocation takes place, you can compile the code with the -DDEBUG definition. E.g:
gcc -Wall -Wextra -DDEBUG -o progname yoursourcefile.c
The program expects your data filename as the first argument and you can provide an optional conversion base as the second argument (default is 10). E.g.:
./progname datafile.txt [base (default: 10)]
Look over it, test it, and let me know if you have any questions.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#define NMAX 64
long xstrtol (char *p, char **ep, int base);
long *realloc_long (long *lp, unsigned long *n);
int main (int argc, char **argv)
{
char *ln = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
size_t idx = 0; /* array index counter */
long *array = NULL; /* pointer to long */
unsigned long nmax = NMAX; /* initial reallocation counter */
FILE *fp = NULL; /* input file pointer */
int base = argc > 2 ? atoi (argv[2]) : 10; /* base (default: 10) */
/* open / validate file */
if (!(fp = fopen (argv[1], "r"))) {
fprintf (stderr, "error: file open failed '%s'.", argv[1]);
return 1;
}
/* allocate array of NMAX long using calloc to initialize to 0 */
if (!(array = calloc (NMAX, sizeof *array))) {
fprintf (stderr, "error: memory allocation failed.");
return 1;
}
/* read each line from file - separate into array */
while ((nchr = getline (&ln, &n, fp)) != -1)
{
char *p = ln; /* pointer to ln read by getline */
char *ep = NULL; /* endpointer for strtol */
while (errno == 0)
{ /* parse/convert each number in line into array */
array[idx++] = xstrtol (p, &ep, base);
if (idx == nmax) /* check NMAX / realloc */
array = realloc_long (array, &nmax);
/* skip delimiters/move pointer to next digit */
while (*ep && *ep != '-' && (*ep < '0' || *ep > '9')) ep++;
if (*ep)
p = ep;
else
break;
}
}
if (ln) free (ln); /* free memory allocated by getline */
if (fp) fclose (fp); /* close open file descriptor */
int i = 0;
for (i = 0; i < idx; i++)
printf (" array[%d] : %ld\n", i, array[i]);
free (array);
return 0;
}
/* reallocate long pointer memory */
long *realloc_long (long *lp, unsigned long *n)
{
long *tmp = realloc (lp, 2 * *n * sizeof *lp);
#ifdef DEBUG
printf ("\n reallocating %lu to %lu\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "%s() error: reallocation failed.\n", __func__);
// return NULL;
exit (EXIT_FAILURE);
}
lp = tmp;
memset (lp + *n, 0, *n * sizeof *lp); /* memset new ptrs 0 */
*n *= 2;
return lp;
}
long xstrtol (char *p, char **ep, int base)
{
errno = 0;
long tmp = strtol (p, ep, base);
/* Check for various possible errors */
if ((errno == ERANGE && (tmp == LONG_MIN || tmp == LONG_MAX)) ||
(errno != 0 && tmp == 0)) {
perror ("strtol");
exit (EXIT_FAILURE);
}
if (*ep == p) {
fprintf (stderr, "No digits were found\n");
exit (EXIT_FAILURE);
}
return tmp;
}
Sample Output (with -DDEBUG to show reallocation)
$ ./bin/read_long_csv dat/randlong.txt
reallocating 64 to 128
reallocating 128 to 256
reallocating 256 to 512
array[0] : 353
array[1] : 394
array[2] : 257
array[3] : 173
array[4] : 389
array[5] : 332
array[6] : 338
array[7] : 293
array[8] : 58
array[9] : 135
<snip>
array[395] : 146
array[396] : 324
array[397] : 424
array[398] : 365
array[399] : 205
Memory Error Check
$ valgrind ./bin/read_long_csv dat/randlong.txt
==26142== Memcheck, a memory error detector
==26142== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==26142== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==26142== Command: ./bin/read_long_csv dat/randlong.txt
==26142==
reallocating 64 to 128
reallocating 128 to 256
reallocating 256 to 512
array[0] : 353
array[1] : 394
array[2] : 257
array[3] : 173
array[4] : 389
array[5] : 332
array[6] : 338
array[7] : 293
array[8] : 58
array[9] : 135
<snip>
array[395] : 146
array[396] : 324
array[397] : 424
array[398] : 365
array[399] : 205
==26142==
==26142== HEAP SUMMARY:
==26142== in use at exit: 0 bytes in 0 blocks
==26142== total heap usage: 7 allocs, 7 frees, 9,886 bytes allocated
==26142==
==26142== All heap blocks were freed -- no leaks are possible
==26142==
==26142== For counts of detected and suppressed errors, rerun with: -v
==26142== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)

Resources