Related
I am trying to parse a csv into a dynamically allocated array of structures, however my attempt crashes with a segmentation fault.
Here is the structure of my data:
SO02773202,5087001,0
SO02773203,5087001,0
SO02773204,5087001,0
SO02773205,5087001,0
SO02773206,5087001,14
This is the struct I am parsing the data into:
typedef struct saleslines{
char* salesid;
char* smmcampaignid;
int numberofbottles;
} saleslines_t;
Here is my attempt at parsing the file:
int read_saleslines(saleslines_t* saleslines, int number_of_lines){
char c;
FILE* fp;
fp = fopen(FILENAME, "r"); /* Open the saleslines file */
if(fp == NULL){ /* Crash if file not found */
printf("Error - file not found\n");
return 0;
}
c = getc(fp);
while (c != EOF){
if (c == '\n'){
number_of_lines += 1;
}
c = getc(fp);
}
printf("Number of lines is %d\n", number_of_lines);
saleslines = (saleslines_t*) malloc((number_of_lines * 2) * sizeof(saleslines_t));
/* allocation of the buffer for every line in the File */
char *buf = (char*) malloc(1000);
char *tmp;
if ( ( fp = fopen(FILENAME, "r" ) ) == NULL )
{
printf( "File could not be opened.\n" );
}
int i = 0;
while (fgets(buf, 255, fp) != NULL){
if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
buf[strlen (buf) - 1] = '\0';
tmp = strtok(buf, ",");
saleslines[i].salesid = strdup(tmp);
tmp = strtok(NULL, ",");
saleslines[i].smmcampaignid = strdup(tmp);
tmp = strtok(NULL, ",");
saleslines[i].numberofbottles = atoi(tmp);
printf("Salesid: %s\nCampaign: %s\nBottles: %i\n\n", saleslines[i].salesid , saleslines[i].smmcampaignid, saleslines[i].numberofbottles);
i++;
}
free(buf);
fclose(fp);
printf("Number of lines is %i\n", number_of_lines);
return number_of_lines;
}
For some reason it parses the file and prints the resulting array of structs, however when I call this function immediately after, it crashes with a segfault:
void print_saleslines_struct(saleslines_t* saleslines, int number_of_lines{
int i;
printf("Number of lines is %i", number_of_lines);
for(i = 0; i < number_of_lines; i++){
printf("Salesid:\t %s\n", saleslines[i].salesid);
printf("Campaign:\t %s\n", saleslines[i].smmcampaignid);
printf("# of Bottles:\t %d\n", saleslines[i].numberofbottles);
}
}
I can't seem to find where this memory bug is.
Here is the initialization and main:
saleslines_t* saleslines;
saleslines_summary_t* saleslines_summary;
saleslines_grouped_t* saleslines_grouped;
int number_of_lines = 0;
int* number_of_linesp = &number_of_lines;
/* Main */
int main(){
int chosen_option;
while(1){
printf("What would you like to do?\n");
printf("1. Read saleslines.txt\n");
printf("2. Print saleslines\n");
printf("3. Summarise saleslines\n");
printf("4. Exit the program\n");
scanf("%d", &chosen_option);
switch(chosen_option){
/* case 1 : number_of_lines = read_saleslines_file(saleslines, number_of_lines); break; */
case 1 : number_of_lines = read_saleslines(saleslines, number_of_lines); break;
case 2 : printf("Number of lines is %i", number_of_lines); print_saleslines_struct(saleslines, number_of_lines); break;
case 3 : summarise_saleslines(saleslines, number_of_linesp, saleslines_summary, saleslines_grouped); break;
case 4 : free(saleslines); free(saleslines_summary); free(saleslines_grouped); return 0;
}
}
return 0;
}
Update
The issue seems to be with my initialization of the array of structures.
When I initialize it like this: saleslines_t* saleslines;
and then malloc like this: saleslines = malloc(number_of_lines + 1 * sizeof(saleslines_t);
I get a segfault.
But if I initialize like this: saleslines[600]; (allocating more than the number of lines in the file), everything works.
How can I get around this? I would like to be able to dynamically allocate the number of entries within the struct array.
Edit 2
Here are the changes as suggested:
int read_saleslines(saleslines_t** saleslines, int number_of_lines);
saleslines_t* saleslines;
int number_of_lines = 0;
int main(){
while(1){
printf("What would you like to do?\n");
printf("1. Read saleslines.txt\n");
printf("2. Print saleslines\n");
printf("3. Summarise saleslines\n");
printf("4. Exit the program\n");
printf("Number of saleslines = %i\n", number_of_lines);
scanf("%d", &chosen_option);
switch(chosen_option){
/* case 1 : number_of_lines = read_saleslines_file(saleslines, number_of_lines); break; */
case 1 : number_of_lines = read_saleslines(&saleslines, number_of_lines); break;
case 2 : printf("Number of lines is %i", number_of_lines); print_saleslines_struct(saleslines, number_of_lines); break;
case 3 : summarise_saleslines(saleslines, number_of_linesp, saleslines_summary, saleslines_grouped); break;
case 4 : free(saleslines); free(saleslines_summary); free(saleslines_grouped); return 0;
}
}
return 0;
}
int read_saleslines(saleslines_t** saleslines, int number_of_lines)
{
char c;
FILE* fp;
fp = fopen(FILENAME, "r"); /* Open the saleslines file */
if(fp == NULL){ /* Crash if file not found */
printf("Error - file not found\n");
return 0;
}
c = getc(fp);
while (c != EOF){
if (c == '\n'){
number_of_lines += 1;
}
c = getc(fp);
}
fclose(fp);
printf("Number of lines is %d\n", number_of_lines);
*saleslines = (saleslines_t*) malloc((number_of_lines + 1) * sizeof(saleslines_t));
/* allocation of the buffer for every line in the File */
char *buf = malloc(25);
char *tmp;
if ( ( fp = fopen(FILENAME, "r" ) ) == NULL )
{
printf( "File could not be opened.\n" );
}
int i = 0;
while (fgets(buf, 25, fp) != NULL){
if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
buf[strlen (buf) - 1] = '\0';
tmp = strtok(buf, ",");
(*saleslines)[i].salesid = strdup(tmp);
tmp = strtok(NULL, ",");
(*saleslines)[i].smmcampaignid = strdup(tmp);
tmp = strtok(NULL, ",");
(*saleslines)[i].numberofbottles = atoi(tmp);
printf("Salesid: %s\nCampaign: %s\nBottles: %i\n\n", saleslines[i]->salesid , saleslines[i]->smmcampaignid, saleslines[i]->numberofbottles);
i++;
}
free(buf);
fclose(fp);
printf("Number of lines is %i\n", number_of_lines);
return number_of_lines;
}
The program now segfaults after reading the first element in the struct array.
You have a problem with the arguments of read_saleslines(). The first argument should be a pointer to an array of your structs, meaning a double pointer.
In
int read_saleslines(saleslines_t* saleslines, int number_of_lines){
you want to modify where saleslines is pointing. saleslines is a local variable of the function, and the scope is that function. Once you exit read_saleslines(), the variable is "destroyed", meaning that the value it holds it is not accessible anymore. Adding another level of indirection, a pointer, you can modify the variable that's defined outside the function, being that (ugly) global or other. So, change that argument so that the function prototype matches
int read_saleslines(saleslines_t** saleslines, int *);
and change the places where you access it inside the function (adding an * to access it, for example:
saleslines = (saleslines_t*) malloc((number_of_lines * ...
to
*saleslines = (saleslines_t*) malloc((number_of_lines * ...
and
saleslines[i].salesid = strdup(tmp);
to
(*saleslines)[i].salesid = strdup(tmp);
Then add an & where you use the variable outside the function:
number_of_lines = read_saleslines(saleslines, number_of_lines);
changes to
some_var = read_saleslines(&saleslines, &number_of_lines);
That will make you code work.
You have a large number of errors in your code, and with your approach in general. There is no need to make two-passes over the file to determine the number of lines before allocating and then re-reading the file in an attempt to parse the data. Further, there is no need to tokenize each line to separate the comma-separated-values, sscanf() to parse the two strings and one int is sufficient here after reading each line with fgets.
While you are free to pass any mix of parameters you like and return whatever you like, since you are allocating for an array of struct and reading values into the array, it makes sense to return a pointer to the allocated array from your function (or NULL on failure) and simply update a parameter passed as a pointer to make the total number of lines read available back in the caller.
Further, generally you want to open and validate the file in the caller and pass a FILE* parameter passing the open file stream to your function. With that in mind, you could refactor your function as:
/* read saleslines into array of saleslines_t, allocating for
* salesid, and smmcampaignid within each struct. Return pointer
* to allocated array on success with lines updated to hold the
* number of elements, or NULL otherwise.
*/
saleslines_t *read_saleslines (FILE *fp, size_t *lines)
{
Within your function, you simply need a buffer to hold each line read, a counter to track the number of elements allocated in your array, and a pointer to your array to return. For example, you could do something like the following to handle all three:
char buf[MAXC]; /* buffer to hold line */
size_t maxlines = MINL; /* maxlines allocated */
saleslines_t *sales = NULL; /* pointer to array of struct */
(note: since you are tracking the number of lines read through the pointer lines passed as a parameter, it would make sense to initialize the value at that address to zero)
Now the work of your function begins, you want to read each line into buf and parse the needed information from each line. Since salesid and smmcampaignid are both pointers-to-char in your struct, you will need to allocate a block of memory for each string parsed from the line, copy the string to the new block of memory, and then assign the beginning address for the bock to each of your pointers. To "dynamically" handle allocating elements for your struct, you simply check if the number of lines (*lines) filled equals against the number allocated (maxlines), (or if *lines is zero indicating a need for an initial allocation), and realloc in both cases to either realloc (or newly allocate) storage for your array of struct.
When you realloc you always realloc using a temporary pointer so if realloc fails and returns NULL, you don't overwrite your pointer to the currently allocated block with NULL thereby creating a memory leak.
Putting all that together at the beginning of your function may seem daunting, but it is actually straight forward, e.g.
while (fgets (buf, MAXC, fp)) { /* read each line in file */
char id[MAXC], cid[MAXC]; /* temp arrays to hold strings */
int bottles; /* temp int for numberofbottles */
if (*lines == maxlines || !*lines) { /* check if realloc req'd */
/* always realloc with a temp pointer */
void *tmp = realloc (sales, 2 * maxlines * sizeof *sales);
if (!tmp) { /* if realloc fails, original pointer still valid */
perror ("realloc-sales"); /* throw error */
return sales; /* return current pointer */
} /* (don't exit or return NULL) */
sales = tmp; /* assign reallocated block to sales */
/* (optional) zero newly allocated memory */
memset (sales + *lines, 0, maxlines * sizeof *sales);
maxlines *= 2; /* update maxlines allocated */
}
Now you are ready to parse the wanted information from your line with sscanf, and then following a successful parse of information, you can allocate for each of your salesid and smmcampaignid pointers, copy the parsed information to the new blocks of memory assigning the beginning address to each pointer, respectively, e.g.
/* parse needed data from line (sscanf is fine here) */
if (sscanf (buf, "%1023[^,],%1023[^,],%d", id, cid, &bottles) == 3) {
size_t idlen = strlen (id), /* get lengths of strings */
cidlen = strlen (cid);
sales[*lines].salesid = malloc (idlen + 1); /* allocate string */
if (!sales[*lines].salesid) { /* validate! */
perror ("malloc-sales[*lines].salesid");
break;
}
sales[*lines].smmcampaignid = malloc (cidlen + 1); /* ditto */
if (!sales[*lines].smmcampaignid) {
perror ("malloc-sales[*lines].smmcampaignid");
break;
}
memcpy (sales[*lines].salesid, id, idlen + 1); /* copy strings */
memcpy (sales[*lines].smmcampaignid, cid, cidlen + 1);
sales[(*lines)++].numberofbottles = bottles; /* assign int */
} /* (note lines counter updated in last assignment) */
(note: you can use strdup to both get the length of each string parsed and allocate sufficient memory to hold the string and assign that to your pointer in one-shot, e.g. sales[*lines].salesid = strdup (id);, but... strdup is not required to be included in C99 or later, so it is just as simple to get the length, allocate length + 1 bytes and then memcpy your string manually to ensure portability. Further, since strdup allocates memory, you must validate the pointer returned -- something overlooked by 99% of those using it.)
That's it, when fgets() fails, you have reached EOF, now simply:
return sales; /* return dynamically allocated array of struct */
}
Putting it altogether in a short, working example that takes the filename to read as the first argument to your program (or reads from stdin by default if no argument is given), you could do:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
#define MINL 2
typedef struct saleslines{
char *salesid;
char *smmcampaignid;
int numberofbottles;
} saleslines_t;
/* read saleslines into array of saleslines_t, allocating for
* salesid, and smmcampaignid within each struct. Return pointer
* to allocated array on success with lines updated to hold the
* number of elements, or NULL otherwise.
*/
saleslines_t *read_saleslines (FILE *fp, size_t *lines)
{
char buf[MAXC]; /* buffer to hold line */
size_t maxlines = MINL; /* maxlines allocated */
saleslines_t *sales = NULL; /* pointer to array of struct */
*lines = 0; /* zero lines */
while (fgets (buf, MAXC, fp)) { /* read each line in file */
char id[MAXC], cid[MAXC]; /* temp arrays to hold strings */
int bottles; /* temp int for numberofbottles */
if (*lines == maxlines || !*lines) { /* check if realloc req'd */
/* always realloc with a temp pointer */
void *tmp = realloc (sales, 2 * maxlines * sizeof *sales);
if (!tmp) { /* if realloc fails, original pointer still valid */
perror ("realloc-sales"); /* throw error */
return sales; /* return current pointer */
} /* (don't exit or return NULL) */
sales = tmp; /* assign reallocated block to sales */
/* (optional) zero newly allocated memory */
memset (sales + *lines, 0, maxlines * sizeof *sales);
maxlines *= 2; /* update maxlines allocated */
}
/* parse needed data from line (sscanf is fine here) */
if (sscanf (buf, "%1023[^,],%1023[^,],%d", id, cid, &bottles) == 3) {
size_t idlen = strlen (id), /* get lengths of strings */
cidlen = strlen (cid);
sales[*lines].salesid = malloc (idlen + 1); /* allocate string */
if (!sales[*lines].salesid) { /* validate! */
perror ("malloc-sales[*lines].salesid");
break;
}
sales[*lines].smmcampaignid = malloc (cidlen + 1); /* ditto */
if (!sales[*lines].smmcampaignid) {
perror ("malloc-sales[*lines].smmcampaignid");
break;
}
memcpy (sales[*lines].salesid, id, idlen + 1); /* copy strings */
memcpy (sales[*lines].smmcampaignid, cid, cidlen + 1);
sales[(*lines)++].numberofbottles = bottles; /* assign int */
} /* (note lines counter updated in last assignment) */
}
return sales; /* return dynamically allocated array of struct */
}
int main (int argc, char **argv) {
saleslines_t *sales = NULL; /* pointer to saleslines_t */
size_t nlines;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
sales = read_saleslines (fp, &nlines); /* read saleslines */
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < nlines; i++) { /* loop over each */
printf ("sales[%2zu]: %s %s %2d\n", i, sales[i].salesid,
sales[i].smmcampaignid, sales[i].numberofbottles);
free (sales[i].salesid); /* free salesid */
free (sales[i].smmcampaignid); /* free smmcampaignid */
}
free (sales); /* free sales */
return 0;
}
Example Use/Output
$ ./bin/saleslines dat/saleslines.txt
sales[ 0]: SO02773202 5087001 0
sales[ 1]: SO02773203 5087001 0
sales[ 2]: SO02773204 5087001 0
sales[ 3]: SO02773205 5087001 0
sales[ 4]: SO02773206 5087001 14
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to insure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/saleslines dat/saleslines.txt
==19819== Memcheck, a memory error detector
==19819== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==19819== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==19819== Command: ./bin/saleslines dat/saleslines.txt
==19819==
sales[ 0]: SO02773202 5087001 0
sales[ 1]: SO02773203 5087001 0
sales[ 2]: SO02773204 5087001 0
sales[ 3]: SO02773205 5087001 0
sales[ 4]: SO02773206 5087001 14
==19819==
==19819== HEAP SUMMARY:
==19819== in use at exit: 0 bytes in 0 blocks
==19819== total heap usage: 13 allocs, 13 frees, 935 bytes allocated
==19819==
==19819== All heap blocks were freed -- no leaks are possible
==19819==
==19819== For counts of detected and suppressed errors, rerun with: -v
==19819== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
There is nothing difficult in dynamically allocating for anything. Just take it in small enough pieces that you dot all "I's" and cross all "T's" for each pointer requiring allocation. Look things over and let me know if you have further questions.
I was given 3 arrays and the input for each array is given in a single line with space between each element.
Example input:
3 2 1 1 1
4 3 2
1 1 4 1
So what I am trying to do is to assign all the elements of first line to array 1, second line to array 2 and third line to array 3.
#include <stdio.h>
int main()
{
int a[20],b[20],c[20],d[3];
int k=0;
char temp;
do{
scanf("%d%c", &a[k], &temp);
k++;
} while(temp != '\n');
d[0]=k;
k=0;
do{
scanf("%d%c", &b[k], &temp);
k++;
} while(temp != '\n');
d[1]=k;
k=0;
do{
scanf("%d%c", &c[k], &temp);
k++;
} while(temp != '\n');
d[2]=k;
return 0;
}
This is what I tried, but this code saves all the elements in the first array itself. Any help?
I've just tried your code and it works fine - a, b, c are filled with the numbers entered via stdin.
However, your primary problem is that scanf is not line oriented. You should instead use fgets to read the line in a string and parse it with strtok and sscanf.
Taking the recommendation to use fgets is one thing, putting it into use the first time is quite another. You use fgets (or POSIX getline) because they provide a mechanism for reading an entire line of text into a buffer at once. This eliminates the pitfalls inherent in trying to use scanf for that purpose.
While POSIX getline will handle a line of any length for you, it dynamically allocates storage for the resulting buffer. fgets on the other hand will read only as many characters as can be stored in the size you specify in the fgets call (reserving space for the nul-character, as fgets always provides a nul-terminated buffer)
This means it is up to you to check that a complete line fit into the buffer you provided for fgets use. Essentially you want to check whether the buffer is full and the last character is not the '\n' character. Note, you are not concerned with trimming the trailing newline here, just in checking for its presence to validate whether a complete line was read. So here you can check whether the length of buffer is your max size (minus 1 for the nul-character) and the last character is not '\n'. If those two conditions exist, you have no way of knowing whether the entire line was read (but see the note after this example). A simple approach to the validation whether a full line was read into buf is, e.g.
while (fgets (buf, MAXC, fp)) {
...
size_t len = strlen (buf); /* length for line validation */
/* validate whole line read into buf - exit on error */
if (len == MAXC - 1 && buf[len - 1] != '\n') {
fprintf (stderr, "error: line %d too long.\n", row + 1);
return 1;
}
(note: for the corner-case of a file without a POSIX eof (end-of-file), e.g. without a '\n' following the last line of text, there is a chance you could actually read an exact buffer full of characters and have no trailing '\n', but still have a complete read -- you can check for EOF with a call to getchar() and return the character to the buffer with putchar if it is other than EOF)
Now on to handling your arrays. Rather than declaring separate arrays of 20 int each, instead declare a 2D array of n row of 20 int each. This makes handling the read and indexing much easier.
You also have the problem of having to capture the number of values you store in each row. While you can do a little indexing magic and store the number of values in each row as the first-column value, it is probably a bit easier just to have a separate array of n values where each index corresponds to the number of values store for each row in your 2D array. For example,
int row = 0, /* row count during read */
idx[ROWS] = {0}, /* array holding col count per row */
arr[ROWS][COLS] = {{0}}; /* 2D array holding each line array */
That way, each time you add a value to one of your rows, you simply increment the corresponding value in idx, e.g.
/* fill a value in row, then */
idx[row]++; /* update col-index for array */
With that background, you are finally ready to start filling your array. The approach is straight-forward. You will:
use an outer loop reading a complete line using fgets (buf, MAXC, fp);
initialize inner loop variable (for offset, etc.);
check that a complete line was read (as shown above);
use an inner loop over buf using sscanf to repeatedly parse a single-integer from buf until all integers are read;
(really 4(a.)) (you call sscanf on buf + offset from the beginning), saving the number characters consumed (saved with the %n format specifier to update offset);
update offset with the number of characters consumed, and repeat.
(note: it is up to you to protect your array bounds to make sure you do not attempt to store more integer values in each array than you have storage for, and that you do not try and store more rows than you have storage for. So on each the outer and inner loop you will add a check to limit the number of rows and columns you read to the available storage)
Your read loops implementing the steps above could look like the following:
/* constants for max rows, cols, and chars for read buf */
enum { ROWS = 4, COLS = 20, MAXC = 512 };
...
while (row < ROWS && fgets (buf, MAXC, fp)) { /* read each line */
int col = 0, /* col being filled */
nchr = 0, /* no. chars consumed by sscanf */
offset = 0, /* offset in buf for next sscaf call */
tmp = 0; /* temp var to hold sscanf conversion */
size_t len = strlen (buf); /* length for line validation */
/* validate whole line read into buf - exit on error */
if (len == MAXC - 1 && buf[len - 1] != '\n') {
fprintf (stderr, "error: line %d too long.\n", row + 1);
return 1;
}
while (col < COLS && /* read each value in line into arr */
sscanf (buf + offset, "%d%n", &tmp, &nchr) == 1) {
arr[row][col++] = tmp; /* assign tmp to array */
offset += nchr; /* update offset in buffer */
idx[row]++; /* update col-index for array */
}
row++; /* increment row for next read */
}
Putting it altogether, you could do something like the following:
#include <stdio.h>
#include <string.h>
/* constants for max rows, cols, and chars for read buf */
enum { ROWS = 4, COLS = 20, MAXC = 512 };
int main (int argc, char **argv) {
int row = 0, /* row count during read */
idx[ROWS] = {0}, /* array holding col count per row */
arr[ROWS][COLS] = {{0}}; /* 2D array holding each line array */
char buf[MAXC] = ""; /* buffer for fgets */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (row < ROWS && fgets (buf, MAXC, fp)) { /* read each line */
int col = 0, /* col being filled */
nchr = 0, /* no. chars consumed by sscanf */
offset = 0, /* offset in buf for next sscaf call */
tmp = 0; /* temp var to hold sscanf conversion */
size_t len = strlen (buf); /* length for line validation */
/* validate whole line read into buf - exit on error */
if (len == MAXC - 1 && buf[len - 1] != '\n') {
fprintf (stderr, "error: line %d too long.\n", row + 1);
return 1;
}
while (col < COLS && /* read each value in line into arr */
sscanf (buf + offset, "%d%n", &tmp, &nchr) == 1) {
arr[row][col++] = tmp; /* assign tmp to array */
offset += nchr; /* update offset in buffer */
idx[row]++; /* update col-index for array */
}
row++; /* increment row for next read */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (int i = 0; i < row; i++) { /* output the arrays read */
for (int j = 0; j < idx[i]; j++)
printf (" %3d", arr[i][j]);
putchar ('\n');
}
return 0;
}
Note: rather than using a fixed size 2D array, you can take things a step further and instead use a pointer-to-pointer-to-int (e.g. a double-pointer, int **arr;) and dynamically allocate and reallocate pointers for rows, as required, and dynamically allocate and reallocate the storage assigned to each pointer to handle any number of integer values per-row. While it is not that much additional work, that is left as an exercise to you when you get to dynamic allocation in your studies. What you are doing with an differing number of column values per-row is creating a jagged array.
Example Input File
Using your input file for testing, e.g.:
$ cat dat/3arr.txt
3 2 1 1 1
4 3 2
1 1 4 1
Example Use/Output
Produces the following output:
$ ./bin/arr_jagged dat/3arr.txt
3 2 1 1 1
4 3 2
1 1 4 1
Look things over and let me know if you have further questions.
I am trying to save each line of a text file into an array.
They way I am doing it and works fine so far is this :
char *lines[40];
char line[50];
int i = 0 ;
char* eof ;
while( (eof = fgets(line, 50, in)) != NULL )
{
lines[i] = strdup(eof); /*Fills the array with line of the txt file one by one*/
i++;
}
My text file has 40 lines , which I am accessing with a for loop
for( j = 0; j <= 39 ; j++)
{ /*Do something to each line*/}.
So far so good. My problem is that i define the size of the array lines
for the a text file that has 40 lines. I tried to count the lines and then define the size but I am getting segmentation fault.
My approach:
int count=1 ; char c ;
for (c = getc(in); c != EOF; c = getc(in))
if (c == '\n') // Increment count if this character is newline
count = count + 1;
printf("\nNUMBER OF LINES = %d \n",count);
char* lines[count];
Any ideas ?
As an aside, I tested the exact code you show above to get line count (by counting newline characters), on a file containing more than 1000 lines, and with some lines 4000 char long. The problem is not there.
The seg fault is therefore likely due to the way you are allocating memory for each line buffer. You may be attempting to write a long line to a short buffer. (maybe I missed it in your post, but could not find where you addressed line length?)
Two things useful when allocating memory for storing strings in a file are number of lines, and the maximum line length in the file. These can be used to create the array of char arrays.
You can get both line count and longest line by looping on fgets(...): (a variation on your theme, essentially letting fgets find the newlines)
int countLines(FILE *fp, int *longest)
{
int i=0;
int max = 0;
char line[4095]; // max for C99 strings
*longest = max;
while(fgets(line, 4095, fp))
{
max = strlen(line);
if(max > *longest) *longest = max;//record longest
i++;//track line count
}
return i;
}
int main(void)
{
int longest;
char **strArr = {0};
FILE *fp = fopen("C:\\dev\\play\\text.txt", "r");
if(fp)
{
int count = countLines(fp, &longest);
printf("%d", count);
GetKey();
}
// use count and longest to create memory
strArr = create2D(strArr, count, longest);
if(strArr)
{
//use strArr ...
//free strArr
free2D(strArr, lines);
}
......and so on
return 0;
}
char ** create2D(char **a, int lines, int longest)
{
int i;
a = malloc(lines*sizeof(char *));
if(!a) return NULL;
{
for(i=0;i<lines;i++)
{
a[i] = malloc(longest+1);
if(!a[i]) return NULL;
}
}
return a;
}
void free2D(char **a, int lines)
{
int i;
for(i=0;i<lines;i++)
{
if(a[i]) free(a[i]);
}
if(a) free(a);
}
There are many ways to approach this problem. Either declare a static 2D array or char (e.g. char lines[40][50] = {{""}};) or declare a pointer to array of type char [50], which is probably the easiest for dynamic allocation. With that approach you only need a single allocation. With constant MAXL = 40 and MAXC = 50, you simply need:
char (*lines)[MAXC] = NULL;
...
lines = malloc (MAXL * sizeof *lines);
Reading each line with fgets is a simple task of:
while (i < MAXL && fgets (lines[i], MAXC, fp)) {...
When you are done, all you need to do is free (lines); Putting the pieces together, you can do something like:
#include <stdio.h>
#include <stdlib.h>
enum { MAXL = 40, MAXC = 50 };
int main (int argc, char **argv) {
char (*lines)[MAXC] = NULL; /* pointer to array of type char [MAXC] */
int i, n = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* valdiate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if (!(lines = malloc (MAXL * sizeof *lines))) { /* allocate MAXL arrays */
fprintf (stderr, "error: virtual memory exhausted 'lines'.\n");
return 1;
}
while (n < MAXL && fgets (lines[n], MAXC, fp)) { /* read each line */
char *p = lines[n]; /* assign pointer */
for (; *p && *p != '\n'; p++) {} /* find 1st '\n' */
*p = 0, n++; /* nul-termiante */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
/* print lines */
for (i = 0; i < n; i++) printf (" line[%2d] : '%s'\n", i + 1, lines[i]);
free (lines); /* free allocated memory */
return 0;
}
note: you will also want to check to see if the whole line was read by fgets each time. (say you had a long line of more than 38 chars in the file). You do this by checking whether *p is '\n' before overwriting with the nul-terminating character. (e.g. if (*p != '\n') { int c; while ((c = getchar()) != '\n' && c != EOF) {} }). That insures the next read with fgets will begin with the next line, instead of the remaining characters in the current line.
To include the check you could do something similar to the following (note: I changed the read loop counter from i to n to eliminate the need for assigning n = i; following the read loop).
while (n < MAXL && fgets (lines[n], MAXC, fp)) { /* read each line */
char *p = lines[n]; /* assign pointer */
for (; *p && *p != '\n'; p++) {} /* find 1st '\n' */
if (*p != '\n') { /* check line read */
int c; /* discard remainder of line with getchar */
while ((c = fgetc (fp)) != '\n' && c != EOF) {}
}
*p = 0, n++; /* nul-termiante */
}
It is up to you whether you discard or keep the remainder of lines that exceed the length of your array. However, it is a good idea to always check. (the lines of text in my example input below are limited to 17-chars so there was no possibility of a long line, but you generally cannot guarantee the line length.
Example Input
$ cat dat/40lines.txt
line of text - 1
line of text - 2
line of text - 3
line of text - 4
line of text - 5
line of text - 6
...
line of text - 38
line of text - 39
line of text - 40
Example Use/Output
$ ./bin/fgets_ptr2array <dat/40lines.txt
line[ 1] : 'line of text - 1'
line[ 2] : 'line of text - 2'
line[ 3] : 'line of text - 3'
line[ 4] : 'line of text - 4'
line[ 5] : 'line of text - 5'
line[ 6] : 'line of text - 6'
...
line[38] : 'line of text - 38'
line[39] : 'line of text - 39'
line[40] : 'line of text - 40'
Now include a the length check in code and add a long line to the input, e.g.:
$ cat dat/40lines+long.txt
line of text - 1
line of text - 2
line of text - 3 + 123456789 123456789 123456789 123456789 65->|
line of text - 4
...
Rerun the program and you can confirm you have now protected against long lines in the file mucking up your sequential read of lines from the file.
Dynamically Reallocating lines
If you have an unknown number of lines in your file and you reach your initial allocation of 40 in lines, then all you need do to keep reading additional lines is realloc storage for lines. For example:
int i, n = 0, maxl = MAXL;
...
while (fgets (lines[n], MAXC, fp)) { /* read each line */
char *p = lines[n]; /* assign pointer */
for (; *p && *p != '\n'; p++) {} /* find 1st '\n' */
*p = 0; /* nul-termiante */
if (++n == maxl) { /* if limit reached, realloc lines */
void *tmp = realloc (lines, 2 * maxl * sizeof *lines);
if (!tmp) { /* validate realloc succeeded */
fprintf (stderr, "error: realloc - virtual memory exhausted.\n");
break; /* on failure, exit with existing data */
}
lines = tmp; /* assign reallocated block to lines */
maxl *= 2; /* update maxl to reflect new size */
}
}
Now it doesn't matter how many lines are in your file, you will simply keep reallocating lines until your entire files is read, or you run out of memory. (note: currently the code reallocates twice the current memory for lines on each reallocation. You are free to add as much or as little as you like. For example, you could allocate maxl + 40 to simply allocate 40 more lines each time.
Edit In Response To Comment Inquiry
If you do want to use a fixed increase in the number of lines rather than scaling by some factor, you must allocate for a fixed number of additional lines (the increase times sizeof *lines), you can't simple add 40 bytes, e.g.
void *tmp = realloc (lines, (maxl + 40) * sizeof *lines);
if (!tmp) { /* validate realloc succeeded */
fprintf (stderr, "error: realloc - virtual memory exhausted.\n");
break; /* on failure, exit with existing data */
}
lines = tmp; /* assign reallocated block to lines */
maxl += 40; /* update maxl to reflect new size */
}
Recall, lines is a pointer-to-array of char[50], so for each additional line you want to allocate, you must allocate storage for 50-char (e.g. sizeof *lines), so the fixed increase by 40 lines will be realloc (lines, (maxl + 40) * sizeof *lines);, then you must accurately update your max-lines-allocated count (maxl) to reflect the increase of 40 lines, e.g. maxl += 40;.
Example Input
$ cat dat/80lines.txt
line of text - 1
line of text - 2
...
line of text - 79
line of text - 80
Example Use/Output
$ ./bin/fgets_ptr2array_realloc <dat/80lines.txt
line[ 1] : 'line of text - 1'
line[ 2] : 'line of text - 2'
...
line[79] : 'line of text - 79'
line[80] : 'line of text - 80'
Look it over and let me know if you have any questions.
I'm going to be getting input in the following format and I wish to store it into an arraylist or maybe even a linked list (whichever one is easier to implement):
3,5;6,7;8,9;11,4;
I want to be able to put the two numbers before the ; into a structure and store these. For example, I want 3,5 to be grouped together, and 6,7 to be grouped together.
I'm unsure as to how to read the input and obtain each pair and store it. The input I'm going to get can be fairly large (up to 60-70mB).
I have tried to use strtok() and strtol() however I just can't seem to get the correct implementation.
Any help would be great
EDIT:
What I have tried to do up til now is use this piece of code to read the input:
char[1000] remainder;
int first, second;
fp = fopen("C:\\file.txt", "r"); // Error check this, probably.
while (fgets(&remainder, 1000, fp) != null) { // Get a line.
while (sscanf(remainder, "%d,%d;%s", first, second, remainder) != null) {
// place first and second into a struct or something
}
}
I fixed the syntax errors in the code, but when I try and compile, it crashes.
With the addition of a single line, the same answer provides a robust and flexible solution to your problem. Take the time to understand what it does. It is not complicated at all, it is simply basic C. In order for you to do your conversion from string to int, you have 2 choices provided by libc, atoi (no error checking) and strtol (with error checking). Your only other alternative is to code a conversion by hand, which given your comments in both versions of this question, isn't what you are looking for.
The following is a good solution. Take the time to learn what it does. Regardless whether you use fgets or getline, the approach to the problem is the same. Let me know what your questions are:
/* read unliminted number of int values into array from stdin
(semicolon or comma separated values, pair every 2 values)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
int main () {
char *ln = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
int *numbers = NULL; /* array to hold numbers */
size_t nmax = NMAX; /* check for reallocation */
size_t idx = 0; /* numbers array index */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed.");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while ((nchr = getline (&ln, &n, stdin)) != -1)
{
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
{
int *tmp = realloc (numbers, 2 * nmax * sizeof *numbers);
if (!tmp) {
fprintf (stderr, "Error: struct reallocation failure.\n");
exit (EXIT_FAILURE);
}
numbers = tmp;
memset (numbers + nmax, 0, nmax * sizeof *numbers);
nmax *= 2;
}
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
size_t i = 0;
for (i = 0; i < idx; i++)
if ( i % 2 == 1 ) /* pair ever 2 values */
printf (" numbers[%2zu] numbers[%2zu] %d, %d\n", i-1, i, numbers[i-1], numbers[i]);
/* free mem allocated to numbers */
if (numbers) free (numbers);
return 0;
}
Output
$ echo "3,5;6,7;8,9;11,4;;" | ./bin/parsestdin2
numbers[ 0] numbers[ 1] 3, 5
numbers[ 2] numbers[ 3] 6, 7
numbers[ 4] numbers[ 5] 8, 11
I have a file of DNA sequences and an associated IDs and I'm trying to save the even lines (the IDs) to one array and the odd lines (the sequences) to another. Then I want to compare all of the sequences with each other to find the unique sequences. For example is Seq A is AGTCGAT and Seq B is TCG, Seq B is not unique. I want to save the unique sequences and their IDs to an output file and id the sequences are not unique, only save the ID to the output file and print "Deleting sequence with ID: " to the console. I'm pretty much done but Im running into a few problems. I tried printing out the two separate arrays, sequences[] and headers[], but for some reason, they only contain two out of the 5 strings (the file has 5 IDs and 5 headers). And then the information isn't printing out to the screen the way it's supposed to.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(){
int total_seq = 20000;
char seq[900];
char** headers;
char** sequences;
int sequence_size = 0;
headers = malloc(total_seq * sizeof(char*));
sequences = malloc(total_seq * sizeof(char*));
int index;
for(index = 0; index < total_seq; index++){
headers[index] = malloc(900 * sizeof(char));
sequences[index] = malloc(900 * sizeof(char));
}
FILE *dna_file;
FILE *new_file;
dna_file = fopen("inabc.fasta", "r");
new_file = fopen("output.fasta", "w");
if (dna_file == NULL){
printf("Error");
return 0;
}
int i = 0;
int j = 0;
while(fgets(seq, sizeof seq, dna_file)){
if(i%2 == 0){
strcpy(headers[i/2], seq);
i++;
}
else{
strcpy(sequences[i/2], seq);
i++;
}
}
fclose(dna_file);
sequence_size = i/2;
char* result;
for(i=0; i < sequence_size; i++){
for(j=0; j < sequence_size; j++){
if(i==j){
continue;
}
result = strstr(sequences[j], sequences[i]);
if(result== NULL){
fprintf(new_file,"%s", headers[i]);
fprintf(new_file,"%s", sequences[i]);
}
else{
printf("Deleting sequence with id: %s \n", headers[i]);
printf(sequences[i]);
fprintf(new_file,"%s", headers[i]);
}
}
}
The sample file inabc.fasta is short but the actual file I'm working with is very long, which is why I've used malloc. Any help would be appreciated!
EDIT: The sample input file inabc.fasta:
cat inabc.fasta
> id1 header1
abcd
> id2 header2
deghj
> id3 header3
defghijkabcd
> id4 header4
abcd
> id5 header5
xcvbnnmlll
So for this sample, sequences 1 and 4 will not be saved to the output file
This:
while( fgets(seq, sizeof seq, dna_file) ) {
if( i % 2 == 0 ){
strcpy(headers[i], seq);
i++;
}
else {
strcpy(sequences[i-1], seq);
i++;
}
}
is going to skip every other element in your arrays:
When i == 0, it'll store in headers[0]
When i == 1, it'll store in sequences[0]
When i == 2, it'll store in headers[2]
When i == 3, it'll store in sequences[2]
and so on.
Then you do:
sequence_size = i/2;
so if you loop sequence_size times, you'll only make it half way through the piece of the array you've written to, and every other element you print will be uninitialized. This is why you're only printing half the elements (if you have 5 elements, then i / 2 == 2, and you'll only see 2), and why it "isn't printing out to the screen the way it's supposed to".
You'll be better off just using either two separate counters when you read in your input, and a separate variable to store whether you're on an odd or even line of input.
For instance:
int i = 0, j = 0, even = 1;
while( fgets(seq, sizeof seq, dna_file) ) {
if( even ){
strcpy(headers[i++], seq);
even = 0;
}
else {
strcpy(sequences[j++], seq);
even = 1;
}
}
Here it's better to have two variables, since if you read in an odd number of lines, your two arrays will contain different numbers of read elements.
In addition to the other comments, there are a few logic errors in your output routine you need to correct. Below, I have left your code in comments, so you can follow the changes and additions made.
There are several ways you can approach memory management a bit more efficiently, and provide a way to cleanly iterate over your data without tracking counters throughout your code. Specifically, when you allocate your array of pointers-to-pointer-to-char, use calloc instead of malloc so that your pointers are initialized to zero/NULL. This allows you to easily iterate over only those pointers that have been assigned.
There is no need to allocate 20000 900 char arrays (times 2) before reading your data. Allocate your pointers (or start with a smaller number of pointers say 256 and realloc as needed), then simply allocate for each element in headers and sequences as needed within your read loop. Further, instead of allocating 1800 chars (900 * 2) every time you add an element to headers and sequences, just allocate the memory required to hold the data. This can make a huge difference. For example, you allocate 20000 * 900 * 2 = 36000000 bytes (36M) before you start reading this small set of sample data. Even allocating all 20000 pointers, allocating memory as needed for this sample data, limits memory usage to 321,246 bytes (less that 1% of 36M)
The logic in your write loop will not work. You must move your write of the data outside of the inner loop. Otherwise you have no way of testing whether to Delete a duplicate entry. Further testing result does not provide a way to skip duplicates. result changes with every iteration of the inner loop. You need to both test and set a flag that will control whether or not to delete the duplicate once you leave the inner loop.
Finally, since you are allocating memory dynamically, you are responsible for tracking the memory allocated and freeing the memory when no longer needed. Allocating your array of pointers with calloc makes freeing the memory in use a snap.
Take a look at the changes and additions I'm made to your code. Understand the changes and let me know if you have any questions. Note: there are many checks omitted for sake of not cluttering the code. You should at minimum make sure you do not exceed the 20000 pointers allocated when run on a full dataset, and realloc as needed. You should also check that strdup succeeded (it's allocating memory), although you get some assurance comparing the headers and sequences index count. I'm sure there are many more that make sense. Good luck.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXSEQ 20000
#define SZSEQ 900
int main ()
{
int total_seq = MAXSEQ; /* initialize all variables */
char seq[SZSEQ] = {0};
char **headers = NULL; /* traditionally variables */
char **sequences = NULL; /* declared at beginning */
// char *result = NULL;
// int sequence_size = 0;
size_t len = 0;
int hidx = 0;
int sidx = 0;
// int idx = 0; /* (see alternative in fgets loop) */
int i = 0;
int j = 0;
int del = 0;
/* calloc initilizes to 0 & allows iteration on addresses */
headers = calloc (total_seq, sizeof (*headers));
sequences = calloc (total_seq, sizeof (*sequences));
/* allocate as needed if possible - see read loop */
// for (index = 0; index < total_seq; index++) {
// headers[index] = malloc (900 * sizeof (char));
// sequences[index] = malloc (900 * sizeof (char));
// }
FILE *dna_file = NULL;
FILE *new_file = NULL;
dna_file = fopen ("inabc.fasta", "r");
new_file = fopen ("output.fasta", "w+"); /* create if not existing "w+" */
if (!dna_file || !new_file) {
fprintf (stderr, "Error: file open failed.\n");
return 1; /* 1 indicates error condition */
}
while (fgets (seq, sizeof (seq), dna_file)) /* read dna_file & separate */
{
len = strlen (seq); /* strip newline from seq end */
if (seq[len-1] == '\n') /* it's never good to leave \n */
seq[--len] = 0; /* scattered through your data */
/* if header line has '>' as first char -- use it! */
if (*seq == '>')
headers[hidx++] = strdup (seq); /* strdup allocates */
else
sequences[sidx++] = strdup (seq);
/* alternative using counter if no '>' */
// if (idx % 2 == 0)
// headers[hidx++] = strdup (seq);
// else
// sequences[sidx++] = strdup (seq);
// idx++
}
fclose (dna_file);
if (hidx != sidx)
fprintf (stderr, "warning: hidx:sidx (%d:%d) differ.\n", hidx, sidx);
// sequence_size = (hidx>sidx) ? sidx : hidx; /* protect against unequal read */
//
// for (i = 0; i < sequence_size; i++) {
// for (j = 0; i < sequence_size; i++) {
// if (i == j) {
// continue;
// }
// result = strstr (sequences[j], sequences[i]);
// if (result == NULL) {
// fprintf (new_file, "%s", headers[i]);
// fprintf (new_file, "%s", sequences[i]);
// } else {
// printf ("Deleting sequence with id: %s \n", headers[i]);
// printf (sequences[i]);
// fprintf (new_file, "%s", headers[i]);
// }
// }
// }
/* by using calloc, all pointers except those assigned are NULL */
while (sequences[i]) /* testing while (sequences[i] != NULL) */
{
j = 0;
del = 0;
while (sequences[j])
{
if (i == j)
{
j++;
continue;
}
if (strstr (sequences[j], sequences[i])) /* set delete flag */
{
del = 1;
break;
}
j++;
}
if (del) {
printf ("Deleting id: '%s' with seq: '%s' \n", headers[i], sequences[i]);
// printf (sequences[i]);
fprintf (new_file, "%s\n", headers[i]);
} else {
fprintf (new_file, "%s\n", headers[i]);
fprintf (new_file, "%s\n", sequences[i]);
}
i++;
}
fclose (new_file);
/* free allocated memory - same simple iteration */
i = 0;
while (headers[i])
free (headers[i++]); /* free strings allocated by strdup */
if (headers) free (headers); /* free the array of pointers */
i = 0;
while (sequences[i])
free (sequences[i++]);
if (sequences) free (sequences);
return 0;
}
output:
$ ./bin/dnaio
Deleting id: '> id1 header1' with seq: 'abcd'
Deleting id: '> id4 header4' with seq: 'abcd'
output.fasta:
$ cat output.fasta
> id1 header1
> id2 header2
deghj
> id3 header3
defghijkabcd
> id4 header4
> id5 header5
xcvbnnmlll
memory allocation/free verification:
==21608== HEAP SUMMARY:
==21608== in use at exit: 0 bytes in 0 blocks
==21608== total heap usage: 14 allocs, 14 frees, 321,246 bytes allocated
==21608==
==21608== All heap blocks were freed -- no leaks are possible