Read txt file line by line in C - c

I want to read a txt file, line by line, and each line stores in a different variable:
here is the txt file I want to read
Jenny
Woodbridge Ave
Amber
Exeter street
Michael
Main Street
David
Plainfield ave
and I did like
#include<stdio.h>
#include<stdlib.h>
#include<ctype.h>
#include<string.h>
typedef struct info
{
char name[20];
char add[50];
}INFO;
int main(void){
const char *fileName = "test.txt";
FILE *file = fopen(fileName, "r");
INFO* list = (INFO*)malloc(20*sizeof(INFO));
readFromFile(file,list);
fclose(file);
free(list);
return 0;
}
void readFromFile(FILE *file,INFO* list){
int i = 0;
while(!feof(file)){
fscanf(file,"%s %s\n ",(list+i)->name,(list+i)->adds);
i++;
}
}
but I getting
Name: Jenny
Addr: Woodbridge
------------------------------
Name: Ave
Addr: Amber
------------------------------
Name: Exeter
Addr: street
------------------------------
Name: Michael
Addr: Main
------------------------------
Name: Street
Addr: David
------------------------------
Name: Plainfield
Addr: ave
I just edited a little bit
so I need to use fgets to read line by line instead of fscanf() right?

%s specifier reads the input stream until it finds a blank character, in your case as you have 2 words per address, it becomes umbalanced as soon as you try to read it, the second word of the address is read by the next cycle into name, it also has a problem of potencial buffer overflow.
You should use %49[^\n], this specifier reads everything until it finds the newline character, including spaces. The 49 is meant to limit the size of the read line as to avoid the mentioned buffer overflow, in you case you have space for 50 characters, the last character would be for the null terminator.
feof is also not the best way to signal the end of file in this kind of routine, more info in Why is “while ( !feof (file) )” always wrong?.
There are some other issues I address in the comments on the below working sample:
Online demo
int main()
{
//casting malloc may make you program fail, it hides the lack of #include <stdlib.h>
INFO* list = malloc(20 * sizeof *list); //using the the dereferenced variable
//is safer regarding future code refactoring
const char *fileName = "test.txt";
FILE *f = fopen(fileName, "r"); //dont forget to verify the return of fopen
int i = read(f, list);
for (int j = 0; j < i; j++) //test print
{
printf("Name: %s Address: %s\n", list[j].name, list[j].add);
}
}
int read(FILE *file, INFO *list)
{
int i = 0;
//use scanf as stop condition, [] notation is easier to read
//limiting the size in scanf specifiers avoids buffer overflow
while (fscanf(file, "%19s %49[^\n]", list[i].name, list[i].add) == 2)
{
i++;
}
//return i so that you know how many structs were read
return i;
}
Output:
Name: Jenny Address: Woodbridge Ave
Name: Amber Address: Exeter street
Name: Michael Address: Main Street
Name: David Address: Plainfield ave

so I need to use fgets to read line by line instead of fscanf() right?
fscanf(file,"%s %s\n ",.... fails as %s does not read spaces into a string as needed by "Woodbridge Ave" and fails to protect against buffer overrun.
There are many way to solve this task. fgets() is the most clear.
Consider a helper function to read a line and handle fgets() peculiarities of line input. Adjust as needed.
// return 1 on success
// return EOF on end-of-file/input error
// else return 0
int read_line(FILE *file, int sz, char *line) {
if (fgets(line, sz, file) == NULL) {
return EOF; // EOF or rare input error
}
int len = strlen(line);
if (len > 0 && line[len - 1] == '\n') {
line[--len] = '\0'; // lop off potential \n
} else if (len + 1 == sz) { // no \n read, `line` full, so look for rest of input
int ch;
int extra = 0;
while ((ch = fgetc(file)) != '\n' && ch != EOF) {
extra = 1;
}
if (extra) {
return 0; // input too long
}
}
return 1;
}
Now read the file in line pairs into a INFO
int read_INFO(FILE *file, int n, INFO *list) {
int i = 0;
while (i < n
&& read_line(file, sizeof list->name, list[i].name) == 1
&& read_line(file, sizeof list->add, list[i].add) == 1) {
i++;
}
return i;
}
Usage
int n = 20;
INFO *list = malloc(sizeof *list * n);
if (list) {
int number_read = read_INFO(file, n, list);
if (number_read > 0) {
// The happy path
}
}

Related

Need help parsing data from .csv file C

I have the following .csv file containing information about the song, artist, release year (if specified) and number of listens:
Look What The Cat Dragged In,Poison,,Look What The Cat Dragged In by Poison,1,0,1,0
Nothin' But A Good Time,Poison,1988,Nothin' But A Good Time by Poison,1,1,21,21
Something To Believe In,Poison,1990,Something To Believe In by Poison,1,1,1,1
Talk Dirty To Me,Poison,1978,Talk Dirty To Me by Poison,1,1,1,1
A Salty Dog,Procol Harum,1969,A Salty Dog by Procol Harum,1,1,1,1
A Whiter Shade of Pale,Procol Harum,1967,A Whiter Shade of Pale by Procol Harum,1,1,3,3
Blurry,Puddle of Mudd,2001,Blurry by Puddle of Mudd,1,1,1,1
Amie,Pure Prairie League,,Amie by Pure Prairie League,1,0,4,0
Another One Bites the Dust,Queen,1980,Another One Bites the Dust by Queen,1,1,102,102
Bicycle Race,Queen,1978,Bicycle Race by Queen,1,1,3,3
Kiss You All Over,Kiss,1978,Kiss You All Over by Kiss,1,1,5,5
The name of the file and the desired year should be given as command line arguments, and the program should print all songs from that specific year.
e.g.: ./a.out music.csv 1978
Output:
Talk dirty to me
Bicycle Race
Kiss You All Over
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX 300
typedef struct {
char song[101], *artist, *line;
long int year;
} music;
int checkYear(char *word)
{
for (int i = 0; i < strlen(word); i++) {
if (!isdigit(word[i]))
return 0;
}
return 1;
}
int main(int argc, char **argv)
{
FILE *fin = fopen(argv[1], "r");
if (!fin)
{
printf("Error opening the file.\n");
return 1;
}
char buf[MAX];
//int nLines = 0; //count the number of lines
//music *array = NULL;
while( fgets(buf, MAX, fin))
{
buf[strcspn(buf, "\n")] = '\0'; // strip the trailing newline
char *word = strtok(buf, ",");
while (word)
{
//printf("Word is : %s\n", word);
if (checkYear(word))
{
//printf("Year : %s\n", word);
music *array = (music *)malloc(sizeof(music));
char *p;
array->year = strtol(word, &p, 10);
if (array->year == atoi(argv[2]))
{
//printf("Year : %ld\t%d\n", array->year, atoi(argv[2]));
if (scanf("%100[^,]", array->song) == 1)
{
printf("Song : %s\n", array->song);
}
}
}
word = strtok(NULL, ",");
}
}
//printf("I've read %d lines\n", nLines);
fclose(fin);
return 0;
}
So far, it's going decent, I can extract the specified year from each line, but now I just need to print the name of the song from those lines (the first token on the line). I thought about using scanf("%[^,]") to read and print everything up until the first comma but it's just stuck in an endless loop. Could you give me an idea? Thanks in advance!
There are multiple problems in the code:
you do not check that enough arguments were passed on the command line, potentially invoking undefined behavior if not.
you do not need to allocate a music structure: you can just parse the first 3 fields, check the year and output the name of the song directly.
strtok() is inappropriate to split fields from a csv file because it treats a sequence of separators as a single separator, which is incorrect and causes invalid parsing if some fields are empty.
sscanf("%[^,]", ...) will fail to convert an empty field.
To split the fields from the csv line, I recommend you use a utility function that behaves like strtok_r() but tailored for csv lines. A simplistic version will stop on , and \n and replace these with a null byte, returning the initial pointer and updating the pointer for the next field. A more advanced version would also handle quotes.
Here is a modified version:
#include <stdio.h>
#include <string.h>
#define MAX 300
char *get_field(char **pp) {
char *p, *start;
for (p = start = *pp; *p; p++) {
if (*p == ',' || *p == '\n') {
*p++ = '\0';
break;
}
}
*pp = p;
return start;
}
int main(int argc, char *argv[]) {
char buf[MAX];
FILE *fin;
char *filename;
char *select_year;
if (argc < 3) {
printf("Missing arguments\n");
return 1;
}
filename = argv[1];
select_year = argv[2];
fin = fopen(filename, "r");
if (!fin) {
printf("Error opening the file %s.\n", filename);
return 1;
}
while (fgets(buf, sizeof buf, fin)) {
char *p = buf;
char *song = get_field(&p);
char *artist = get_field(&p);
char *year = get_field(&p);
if (!strcmp(year, target_year)) {
printf("%s\n", song);
}
}
fclose(fin);
return 0;
}
regarding: scanf("%[^,]") this consumes (upto but not including) the comma.
So the next instruction needs to be something like getchar() to consume the comma. Otherwise, on the next loop nothing will be read because the first character in stdin is that same comma.

Find number of occurrences for the substring in a string using C programming

I am trying a program in c to read a text file that contains array of characters or a string and find the number of occurrences of the substring called "GLROX" and say sequence found when it is found. And the "inputGLORX.txt" contains following string inside it.
GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX
But i am getting wierd results. It would be great if some expert in C-programming helps me to solve this and thanks in advance.
#include <stdio.h>
#include <conio.h>
#include <string.h>
#define NUMBER_OF_STRINGS 40
#define MAX_STRING_SIZE 7
void seqFound()
{
printf("Sequence Found\n");
}
int main()
{
FILE *fp;
char buff[1000];
char strptrArr[NUMBER_OF_STRINGS] [MAX_STRING_SIZE];
const char *search = "GLROX";
fp = fopen("D:/CandC++/inputGLORX.txt", "r");
if(fp==NULL)
printf("It is a null pointer");
while(!feof(fp))
{
//fscanf(fp, "%s", buff);
fgets(buff, 1000,fp);
}
int len = strlen(buff);
printf("length is %d\n",len);
int count = 0;
char *store;
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++;
}
printf("count is %d\n",count);
while (count!=0) {
seqFound();
count--;
}
return 0;
}
As said in the comment, their are at least 2 problems in the code: your fgets will only fetch the last line (if it fetch one at all ? In any case, this is not what you want), and you are incrementing the search string instead of the buff string.
Something like this should fix most of your problems, as long as no lines in your file are longer than 999 characters. This will not work properly if you use the \n or NULL characters in your search string.
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
Here is a main for testing. I used argv to provide the input.txt and the search string.
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv)
{
FILE *fp;
char buff[1000];
char *search;
if (argc < 3)
return (-1);
search = argv[2];
if (search[0] == '\0')
return (-1);
if ((fp = fopen(argv[1], "r")) == NULL)
return (-1);
int count = 0;
while (fgets(buff, 1000, fp) != NULL)
{
char *temp = buff;
while ((temp = strstr(temp, search)))
{
printf("%d. %s\n", count + 1, temp);
count++;
temp++;
}
}
printf("Match found: %d\n", count);
return 0;
}
The way you search in buff is wrong, i.e. this code:
while(store = strstr(buff, search))
{
printf("substring is %s \n",store);
count++;
search++; // <------- ups
}
When you have a hit, you change search, i.e. the string you are looking for. That's not what you want. The search string (aka the needle) shall be the same all the time. Instead you want to move forward in the buffer buff so that you can search in the remainder of the buffer.
That could be something like:
int main()
{
const char* buff = "GLAAAROBBBBBBXGLROXGLROXGLROXGLROXGLCCCCCCCCCCCCCCROXGGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROXGLROX";
const char* search = "GLROX";
const char* remBuff = buff; // Pointer to the remainder of buff
// Initialized to be the whole buffer
const char* hit;
int cnt = 0;
while((hit = strstr(remBuff, search))) // Search in the remainder of buff
{
++cnt;
remBuff = hit + 1; // Update the remainder pointer so it points just 1 char
// after the current hit
}
printf("Found substring %d times\n", cnt);
return 0;
}
Output:
Found substring 15 times

Compare each line from two different files and print the lines that are different in C

Supposing that I have two files like this:
file1.txt
john
is
the new
guy
file2.txt
man
the old
is
rick
cat
dog
I'd like to compare first line from file1 with all the lines from file2 and verify if it exist. If not, go two the second line from file1 and compare it with all the lines from file2.. and so on until eof is reached by file1.
The output that I expect is:
john
the new
guy
How I thought this should be done:
read file1 and file2
create a function which returns the line number of each of them
take the first line from file1 and compare it to all the lines from file2
do this until all the lines from file1 are wasted
Now, I don't know what I'm doing wrong, but I don't get the result that I expect:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int countlines(char *filename)
{
int ch = 0, lines = 0;
FILE *fp = fopen(filename, "r");
if (fp == NULL)
return 0;
do {
ch = fgetc(fp);
if (ch == '\n')
lines++;
} while (ch != EOF);
if (ch != '\n' && lines != 0)
lines++;
fclose(fp);
return lines;
}
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
int counter = 0;
for (int i = 0; i < countlines(argv[1]); i++)
{
fgets(buffer_line_template_file, 100, template_file);
for (int j = 0; j < countlines(argv[2]); j++)
{
fgets(buffer_line_data_file, 100, data_file);
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
counter++;
printf("%d", counter);
}
}
}
printf("\n\n");
return 0;
}
Could someone please point me into the right direction ? For testing purposes I created a counter at the end which was a part of a small debug. There should be the print() function
As per #chux answer I got the following simplified code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
while(fgets(buffer_line_template_file, 100, template_file))
{
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
printf("%s\n", buffer_line_template_file);
}
}
}
printf("\n\n");
return 0;
}
The above code is giving me the following output, which is not what is expected:
john
john
john
john
john
john
is
is
is
is
is
the new
the new
the new
the new
the new
the new
guy
guy
guy
guy
guy
guy
Problems with OP's code
Imprecise definition of line.
Excessive recalculation
Fuzzy determination of the number of lines in a file.
Unlike string, which has a precise definition in C, reading a line is not so well defined. The primary specificity issue: does a line contain the trailing '\n'. If the first answer is Yes, then does the last text in a file after a '\n' constitute a line? (Excessively long lines are another issue, but let us not deal with that today.)
Thus possibly some lines end with '\n' and others do not, fooling strcmp("dog", "dog\n").
The easiest solution is to read a line until either 1) a '\n' is encountered, 2) EOF occurs or 3) line buffer is full. Then after getting a line, lop off the potential trailing '\n'.
Now all lines code subsequently works with have no '\n'.
fgets(buffer_line_template_file, 100, template_file);
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
OP's loop is incredible wasteful. Consider a file with 1000 lines. Code will loop, calling 1000 times countlines() (each countlines() call reads 1000 lines) times when one countlines() call would suffice.
// for (int j = 0; j < countlines(argv[2]); j++)
int j_limit = countlines(argv[2]);
for (int j = 0; j < j_limit; j++)
There really is no need to count the line anyways, just continue until EOF (fgets() returns NULL). So no need to fix its fuzzy definition. (fuzzy-ness concerns same issues as #1)
int counter = 0;
for (fgets(buffer_line_template_file, 100, template_file)) {
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while ((fgets(buffer_line_data_file, 100, data_file)) {
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0) {
counter++;
printf("%d", counter);
}
}
}
Other simplifications possible - for another day.
FWIW, following counts lines of text allowing the last line in the file to optionally end with a '\n'.
unsigned long long FileLineCount(FILE *istream) {
unsigned long long LineCount = 0;
rewind(istream);
int previous = '\n';
int ch;
while ((ch = fgetc(inf)) != EOF) {
if (previous == '\n') LineCount++;
previous = ch;
}
return LineCount;
}
Note that this function may get a different result that fgets() calls. Consider a file of one line of 150 characters. fgets(..., 100,...) will report 2 lines. FileLineCount() reports 1.
[Edit] Updated code to conform to OP functionality.
int found = 0;
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) == 0)
{
found = 1;
break;
}
}
if (!found) printf("%s\n", buffer_line_template_file);
This program prints the diff of two files file1.txt and file2.txt.
#include<stdio.h>
#include <stdlib.h>
#include <memory.h>
int main() {
FILE *fp1, *fp2;
int ch1, ch2;
char fname1[40], fname2[40];
char *line = NULL;
size_t len = 0;
ssize_t read;
char *line2 = NULL;
size_t len2 = 0;
ssize_t read2;
fp1 = fopen("file1.txt", "r");
fp2 = fopen("file2.txt", "r");
if (fp1 == NULL) {
printf("Cannot open %s for reading ", fname1);
exit(1);
} else if (fp2 == NULL) {
printf("Cannot open %s for reading ", fname2);
exit(1);
} else {
while ((read = getline(&line, &len, fp1)) != -1 && (read2 = getline(&line2, &len2, fp2)) != -1) {
if (!strcmp(line, line2)) {
printf("Retrieved diff on line %zu :\n", read);
printf("%s", line);
}
}
if (ch1 == ch2)
printf("Files are identical \n");
else if (ch1 != ch2)
printf("Files are Not identical \n");
fclose(fp1);
fclose(fp2);
}
return (0);
}
You already have a very good answer (and always will from chux), but here is a slightly different approach to the problem. It uses automatic storage to reading file2 into an array of strings and then compares each line in file1 against every line in file2 to determine whether it is unique. You can easily convert the code to dynamically allocate memory, but for sake of complexity that was omitted:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 256, MAXL = 512 };
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1);
int main (int argc, char **argv) {
FILE *fp1 = fopen (argc > 1 ? argv[1] : "file1.txt", "r");
FILE *fp2 = fopen (argc > 2 ? argv[2] : "file2.txt", "r");
size_t n1 = 0, n2 = 0;
if (!fp1 || !fp2) {
fprintf (stderr, "error: file open failed.\n");
return 1;
}
printf ("\nunique words in file1, not in file 2.\n\n");
file1infile2 (fp2, fp1, &n2, &n1);
printf ("\nanalyzed %zu lines in file1 against %zu lines in file2.\n\n",
n1, n2);
return 0;
}
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1)
{
char buf[MAXC] = "";
char f2buf[MAXL][MAXC] = { "" };
size_t i;
*n1 = *n2 = 0;
while (*n2 < MAXL && fgets (buf, MAXC, fp2)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0;
strcpy (f2buf[(*n2)++], buf);
}
while (*n1 < MAXL && fgets (buf, MAXC, fp1)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0, (*n1)++;
for (i = 0; i < *n2; i++)
if (!(strcmp (f2buf[i], buf)))
goto matched;
printf (" %s\n", buf);
matched:;
}
}
Look over the code and let me know if you have any questions.
Example Use/Output
$ ./bin/f1inf2 dat/f1 dat/f2
unique words in file1, not in file 2.
john
the new
guy
analyzed 4 lines in file1 against 6 lines in file2.

C: fgets return an extra blank line at beginning

I am trying to read a file and reverse each line of it and display it to standard output in C.
my file is:
$ cat f1
this is line 1
this is line 2
this is line 3
My code is:
#include <stdio.h>
#include <string.h>
void reverse_string(char *s) {
int l = strlen(s);
int i;
char sr[l];
int j = 0;
for (i = l - 1; i >= 0; i--) {
sr[j] = s[i];
j++;
}
printf("%s", sr);
}
int main(int argc, char *argv[]) {
FILE *fp;
int i;
char filename[128];
char line[100];
for (i = 1; i < argc; i++) {
strcpy(filename, argv[i]);
}
fp = fopen(filename, "r");
while (fgets(line, 128, fp) != NULL) {
if (strlen(line) != 0) {
reverse_string(line);
}
}
printf("\n");
}
Output is:
$ ./mycode f1
(blank line here)
1 enil si siht
2 enil si siht
3 enil si siht
I am confused, why is there a extra blank line being output by my while loop even though there is no blank line in the file.
fgets includes a '/n' before nul char.
So when you reverse a string, the first thing you do is print it.
My ugly fix is:
replace the fist printf with
printf("%s\n", sr+1);
remove the second one.
And remember, when all fails, read the manual.:
fgets() reads in at most one less than size characters from stream and
stores them into the buffer pointed to by s.
Reading stops after an EOF or a newline. If a newline is read, it is stored into the buffer.
A terminating null byte ('\0') is stored after the last character in the buffer.
I am also trying to read from the file. My file contains the user's name and after reading username I need to add a constant character to each user. But when I am reading the file, I am getting unwanted empty lines in output and constant character is showing at empty line not together with the user name.
int main()
{
char buffer[20] ;
char i, j;
const char *a="love";
FILE *fp;
if ((fp = fopen("user.txt","r")) == NULL)
{
perror ("Error opening file");
}
while ( !feof(fp))
{
// read in the line and make sure it was successful
if (fgets(buffer,20,fp) != NULL)
{
cout << buffer << endl;
for(i = 0; buffer[i] != '\0'; ++i);
for(j = 0; a[j] != '\0'; ++j, ++i)
{
buffer[i] = a[j];
}
buffer[i] = '\0' ;
//cout << buffer << endl ;
}
}
}
I am getting output like this
asharma5
love
as23w
love
qwssdd
love
My desired output is adding love to each user name.
asharma5love
as23wlove
qwssddlove

Easiest way to read this line of text into a struct?

I have a text file with data in the form:
Lee AUS 2 103 2 62 TRUE
Check AUS 4 48 0 23 FALSE
Mills AUS 8 236 0 69 FALSE
I need to each line into a struct like, however I'd like to avoid using fixed length arrays (the problem with fgets as far as I can tell):
struct Data
{
char *sname;
char *country;
int *a;
int *b;
int *c;
int *d;
char *hsisno;
};
I am very new to C. Should I use fscanf, or fgets?
fscanf stands for "file scan formatted" and user data is about as unformatted as you can get.
You should never use naked "%s" format strings on data where you don't have absolute control over what can be read.
The best solution is to use fgets to read a line since this allows you to prevent buffer overflow.
Then, once you know the size of your line, that's the maximum size of each string that you will require. Use sscanf to your heart's content to get the actual fields.
One final thing. It's probably a bit wasteful having int* types for the integers, since you know they have a specific maximum size already. I'd use the non-pointer variant, something like:
struct Data {
char *sname; char *country;
int a; int b; int c; int d;
char *hsisno;
};
By way of example, here's some safe code:
#include <stdio.h>
#include <string.h>
// Here's all the stuff for a linked list of your nodes.
typedef struct sData {
char *sname; char *country; char *hsisno;
int a; int b; int c; int d;
struct sData *next;
} Data;
Data *first = NULL; Data *last = NULL;
#define MAXSZ 100
int main (void) {
char line[MAXSZ], sname[MAXSZ], country[MAXSZ], hsisno[MAXSZ];
int a, b, c, d;
FILE *fIn;
Data *node;
// Open the input file.
fIn = fopen ("file.in", "r");
if (fIn == NULL) {
printf ("Cannot open file\n");
return 1;
}
// Process every line.
while (fgets (line, sizeof(line), fIn) != NULL) {
// Check line for various problems (too short, too long).
if (line[0] == '\0') {
printf ("Line too short\n");
return 1;
}
if (line[strlen (line)-1] != '\n') {
printf ("Line starting with '%s' is too long\n", line);
return 1;
}
line[strlen (line)-1] = '\0';
// Scan the individual fields.
if (sscanf (line, "%s %s %d %d %d %d %s",
sname, country, &a, &b, &c, &d, hsisno) != 7)
{
printf ("Line '%s' didn't scan properly\n", line);
return 1;
}
// Allocate a new node to hold data.
node = malloc (sizeof (Data));
if (node == NULL) {
printf ("Ran out of memory\n");
return 1;
}
node->sname = strdup (sname);
node->country = strdup (country);
node->a = a;
node->b = b;
node->c = c;
node->d = d;
node->hsisno = strdup (hsisno);
node->next = NULL;
if (first != NULL) {
last->next = node;
last = node;
} else {
first = node;
last = node;
}
}
fclose (fIn);
// Output the list for debugging.
node = first;
while (node != NULL) {
printf ("'%s' '%s' %d %d %d %d '%s'\n",
node->sname, node->country, node->a, node->b,
node->c, node->d, node->hsisno);
node = node->next;
}
return 0;
}
which reads in your file and stores it in a linked list. It outputs:
'Lee' 'AUS' 2 103 2 62 'TRUE'
'Check' 'AUS' 4 48 0 23 'FALSE'
'Mills' 'AUS' 8 236 0 69 'FALSE'
at the end, as expected.
I've done a whole series of answers on the pitfalls of using *scanf functions on non-controlled data (enter user:14860 fgets into the search box above), some of which (here, here and here, for example) include a perennial favourite function of mine, getLine, for safer user input.

Resources