C - Parsing an input file into lines and characters - c

I am trying to write a C program to parse an input file so that individual lines are parsed, and in each line, the individual characters are then parsed further and stored in different variables in a struct. Here is my code so far (I have managed to parse individual characters without considering which line they are on):
/* create struct instances */
/* file open code */
...
int currentChar = fscanf(fp, "%s", storageArray);
while (currentChar != EOF) {
printf("%s\n", storageArray);
currentChar = fscanf(fp, "%s", storageArray);
}
...
/* file close code */
How can I adapt my code so that, instead of having each individual character print to the screen, I get behaviour like the following: (Note: in my program I assume the user input will have three characters to a line.)
INPUT FILE:
a b c
f e d
LINE STRUCT 1:
char1 = a
char2 = b
char3 = c
LINE STRUCT 2:
char1 = f
char2 = e
char3 = d
I feel like the solution might involve nested loops similar to the while I have written, where the outer one keeps track of lines and the inner one keeps track of characters.

Or try this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define READ_OPTIONS "r"
struct line {
char char1;
char char2;
char char3;
struct line* next;
};
struct line* g_lines = NULL;
int main(int argc, char** argv) {
char buf[8] = {0};
struct line* newline, *iter, *head;
int counter = 1;
FILE* fp = fopen("file.txt", READ_OPTIONS);
if(NULL != fp) {
while(fgets(buf, 8, fp)) {
newline = malloc(sizeof(struct line));
if(NULL != newline) {
memset(newline, 0, sizeof(struct line));
sscanf(buf, "%c %c %c",
&newline->char1,
&newline->char2,
&newline->char3);
if(NULL != g_lines) {
for(iter = g_lines;
NULL != iter->next;
iter = iter->next);
iter->next = newline;
} else g_lines = newline;
}
}
fclose(fp);
} else return -1;
/* print the contents */
for(iter = g_lines;
NULL != iter;
iter = iter->next,
++counter)
printf("Line %d: char1=%c char2=%c char3=%c\n",
counter, iter->char1, iter->char2,
iter->char3);
/*
now to free memory before returning
control to the operating system
*/
for(iter = g_lines;
NULL != iter;)
{
head = iter->next;
free(iter);
iter = head;
}
return 0;
}

Try This
int i=1,j-1;
printf("Line STRUCT 1: ");
while( ( ch = fgetc(fp) ) != EOF ){
if(ch=='\n'){
printf("Line STRUCT %d \n",++i);
j=1;
}
printf("Char %d = %c \n",ch,j++);
}

Do this:
for (int i=0; !feof(fp); i++)
fscanf(fp,"%c %c %c ",
&storageArray[i].char1,
&storageArray[i].char2,
&storageArray[i].char3);

Related

Problem while opening a file in C on linux

I'm having problems with my C program which works perfectly on Windows but not on Linux. I use the following method for reading line by line a file:
char * getLineOfAnySize(FILE* fp, size_t typicalSize, int *endOfLineDetected,size_t *nrOfCharRead){
char *line; // buffer for our string
int ch; // we will read line character by character
size_t len = 0; // number of characters read (character counter)
size_t lineSize = typicalSize; // initial size of the buffer allocated for the line
*nrOfCharRead = 0;
if(!fp) return NULL; // protection
// allocating the buffer
line = realloc(NULL, sizeof(char)*lineSize); // expected size of the line is pathHead to typicalSize
if (!line) return line; // protection, if we fail to allocate the memory we will return NULL
while (1) { // loop forever
ch = fgetc(fp); // getting character by character from file
if (ch == '\n') break; // end of line detected - breaking the loop
if( ch == EOF) {
*endOfLineDetected = 1;
break; // end of file detected - breaking the loop
}
line[len++] = ch; // store the character in the line buffer, increase character counter
if (len == lineSize){ // we reached the end of line buffer (no more room)
lineSize = lineSize + 64; // we have to increase the line size
line = realloc(line, sizeof(char)*(lineSize)); // line buffer has new size now
if (!line) return line; // if we fail to allocate memory we will return NULL
}
if( (len == 0) && *endOfLineDetected){ // empty file
*endOfLineDetected = 1;
break;
}
}
line[len++] ='\0'; // ending the string (notice there is no '\n' in the string)
*nrOfCharRead = len;
return line; // return the string
}
The workflow of my program is the following: I gave in input a path, the file correspondent to the path contains in each line others file path that I read with the function above and put into a structure. On each i apply the KMP algorithm to get the occurrences of a string.
The problem comes in my program when I try to open the files that correspond to the paths I saved earlier:
FILE *fp = NULL;
fp = fopen(list->path, "r");
if(fp == NULL){
fprintf(stderr, "Cannot open %s, exiting. . .\n", list->path);
exit(1);
}
On the screen is displayed:
, exiting ...
This is so weird because of file opening problem the output should be:
Cannot open "list->path content", exiting. . .
Even though I don't know why it gives me this error while opening the path read from the input file. During compiling there's no problem. I was thinking about buffer problems derived by the function "getLineOfAnySize. I'm not a Linux user, I was just trying to run the program in order to make sure it will run on both OS. Don't think about design issues or logical issues because on Windows everything works perfectly. Big up to everyone who will help me! Please ask further information about the code if needed.
EDIT:
The content of the input file is:
/home/xxx/Scrivania/find/try
/home/xxx/Scrivania/find/try1
Note that find is the directory of the project.
The following is a sample of my program in order to make more sense of variable and construct:
foo.c :
#include "foo.h"
FILE *fInput = NULL;
FILE *fp = NULL;
char *line1;
char *line2;
int endOfLineDetected = 0;
size_t nrOfCharRead = 0;
char ch;
fWord *w = NULL;
fWord *wordHead = NULL;
fWord *wordTail = NULL;
fList *list = NULL;
fList *listHead = NULL;
fList *listTail = NULL;
fPath *pathHead = NULL;
fPath *pathTail = NULL;
fPosition *positionHead = NULL;
fPosition *head = NULL;
fPosition *current = NULL;
char * getLineOfAnySize(FILE* fp, size_t typicalSize, int *endOfLineDetected,size_t *nrOfCharRead);
int main(int argc, char *argv[]){
fInput = fopen(argv[1], "r"); //the file that contains the path of the file in which search.
if(fInput == NULL){
fprintf(stderr, "Cannot open %s, exiting. . .\n", argv[1]);
exit(1);
}
while(!endOfLineDetected){ //read line by line the input file in order to save the path in a structure
line1 = getLineOfAnySize(fInput,128,&endOfLineDetected,&nrOfCharRead);
fList *node = malloc (sizeof(fList));
node->path = line1;
node->next = NULL;
if(listHead == NULL){
listHead = listTail = node;
}else{
listTail = listTail->next = node;
}
}
list = listHead;
fclose(fInput);
do{
fWord *app = malloc(sizeof(fWord));
printf("Insert the word to search: ");
scanf("%s", app->word);
app->totalOccurences = 0;
app->p = NULL;
app->next = NULL;
if(wordHead == NULL){
wordTail = wordHead = app;
}else{
wordTail = wordTail->next = app;
}
printf("Do you want to insert another word? (Y/N): ");
scanf(" %c", &ch);
}while(ch == 'y' || ch == 'Y');
w = wordHead;
while(w != NULL){
while(list != NULL){
w->p = malloc(sizeof(fPath));
w->p->fileOccurrences = 0;
w->p->path = list->path;
w->p->position = NULL;
w->p->next = NULL;
if(pathHead == NULL){
pathTail = pathHead = w->p;
}else{
pathTail = pathTail->next = w->p;
}
fp = fopen(w->p->path, "r");
if(fp == NULL){
fprintf(stderr, "Cannot open %s, exiting. . .\n", w->p->path);
exit(1);
}
int countLine = 0;
endOfLineDetected = 0;
while(!endOfLineDetected){
line2 = getLineOfAnySize(fp,128,&endOfLineDetected,&nrOfCharRead);
int n = strlen(line2);
int m = strlen(w->word);
w->p->fileOccurrences = w->p->fileOccurrences + KMP(line2, w->word, n, m, countLine, w->p);
countLine = countLine + 1;
}
w->totalOccurences = w->totalOccurences + w->p->fileOccurrences;
w->p->position = getHead();
w->p = w->p->next;
list = list->next;
fclose(fp);
}
w->p = pathHead;
list = listHead;
w = w->next;
pathHead = NULL;
}
w = wordHead;
while(w != NULL){
printf("WORD %s \r\n", w->word);
printf("TOTAL %d \r\n", w->totalOccurences);
pathHead = w->p;
while(w->p != NULL){
printf("FILE %s \r\n", w->p->path);
printf("OCCURENCES %d \r\n", w->p->fileOccurrences);
positionHead = w->p->position;
while (w->p->position != NULL){
printf("%d %d\r\n", w->p->position->line, w->p->position->character);
w->p->position = w->p->position->next;
}
w->p->position = positionHead;
w->p = w->p->next;
}
w->p = pathHead;
w = w->next;
}
w = wordHead;
printf("\r\n");
freeMemory();
freeKMP();
return 0;
}
char * getLineOfAnySize(FILE* fp, size_t typicalSize, int
*endOfLineDetected,size_t *nrOfCharRead){
char *line; // buffer for our string
int ch; // we will read line character by character
size_t len = 0; // number of characters read (character counter)
size_t lineSize = typicalSize; // initial size of the buffer allocated for the line
*nrOfCharRead = 0;
if(!fp) return NULL; // protection
// allocating the buffer
line = realloc(NULL, sizeof(char)*lineSize); // expected size of the line is pathHead to typicalSize
if (!line) return line; // protection, if we fail to allocate the memory we will return NULL
while (1) { // loop forever
ch = fgetc(fp); // getting character by character from file
if (ch == '\n') break; // end of line detected - breaking the loop
if( ch == EOF) {
*endOfLineDetected = 1;
break; // end of file detected - breaking the loop
}
line[len++] = ch; // store the character in the line buffer, increase character counter
if (len == lineSize){ // we reached the end of line buffer (no more room)
lineSize = lineSize + 64; // we have to increase the line size
line = realloc(line, sizeof(char)*(lineSize)); // line buffer has new size now
if (!line) return line; // if we fail to allocate memory we will return NULL
}
if( (len == 0) && *endOfLineDetected){ // empty file
*endOfLineDetected = 1;
break;
}
}
line[len++] ='\0'; // ending the string (notice there is no '\n' in the string)
*nrOfCharRead = len;
return line; // return the string
}
// Function to implement KMP algorithm
int KMP(const char* X, const char* Y, int m, int n, int line, fPath *app){
int count = 0;
// next[i] stores the index of next best partial match
int next[n + 1];
for (int i = 0; i < n + 1; i++)
next[i] = 0;
for (int i = 1; i < n; i++){
int j = next[i + 1];
while (j > 0 && Y[j] != Y[i])
j = next[j];
if (j > 0 || Y[j] == Y[i])
next[i + 1] = j + 1;
}
for (int i = 0, j = 0; i < m; i++){
if(X[i] == Y[j]){
if (++j == n){
count = count + 1; //conta le occorrenze della parola nella riga in input
fPosition *node = malloc (sizeof(fPosition));
node->line = line;
node->character = i - j + 1;
node->next = NULL;
if(head == NULL){
current = head = node;
}else{
current = current->next = node;
}
app->position = current;
}
}
else if (j > 0) {
j = next[j];
i--; // since i will be incremented in next iteration
}
}
return count;
}
fPosition * getHead(){ //rimette il puntatore alla testa della lista
fPosition *app = head;
head = NULL;
return app;
}
void freeKMP(){
free(head);
free(current);
}
void freeMemory(){
list = listHead;
fList *tempL = NULL;
while(list != NULL){
tempL = list;
list = list->next;
free(tempL);
}
w = wordHead;
fWord *tempW = NULL;
fPath *tempP = NULL;
fPosition *tempO = NULL;
while(w != NULL){
while(w->p != NULL){
while(w->p->position != NULL){
tempO = w->p->position;
w->p->position = w->p->position->next;
free(tempO);
}
tempP = w->p;
w->p = w->p->next;
free(tempP);
}
tempW = w;
w = w->next;
free(tempW);
}
free(w);
free(line1);
free(line2);
free(wordHead);
free(wordTail);
free(listHead);
free(listTail);
free(pathHead);
free(pathTail);
free(positionHead);
}
foo.h:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
struct fileList{
char *path;
struct fileList *next;
};
struct filePath{
char *path;
int fileOccurrences;
struct OccurrencesPosition *position;
struct filePath *next;
};
struct fileWord{
char word[50];
int totalOccurences;
struct filePath *p;
struct fileWord *next;
};
struct OccurrencesPosition{
int line;
int character;
struct OccurrencesPosition *next;
};
typedef struct filePath fPath;
typedef struct fileWord fWord;
typedef struct OccurrencesPosition fPosition;
typedef struct fileList fList;
fPosition * getHead();
int KMP(const char* X, const char* Y, int m, int n, int line, fPath *app);
void freeMemory();
void freeKMP();
Maybe also the way I free memory isn't correct.
This is not a full answer, but a hint for further analysis.
I tested the program with the input file contents as shown in the question and entered one or two words.
If the first file does not exist, I get an error message as expected:
Cannot open /home/yuripaoloni/Scrivania/find/try, exiting. . .
Then I modified the input file to list two files that exist on my system and get an error message
Cannot open , exiting. . .
I extended the code that tries to open the file to get more output:
fp = fopen(w->p->path, "r");
if(fp == NULL){
fprintf(stderr, "Cannot open %s, exiting. . .\n", w->p->path);
perror("fopen");
exit(1);
} else {
printf("Successfully opened %s\n", w->p->path);
}
This prints
$ ./foo input
Insert the word to search: foo
Do you want to insert another word? (Y/N): y
Insert the word to search: bar
Do you want to insert another word? (Y/N): y
Insert the word to search: baz
Do you want to insert another word? (Y/N): n
Successfully opened /home/username/tmp/try
Successfully opened /home/username/tmp/try1
Cannot open , exiting. . .
fopen: No such file or directory
Apparently your program tries to open a third file after the existing file names. w->p->path might be a NULL pointer or may point to an empty string.
The same error occurs when I enter only one word. I did not further analyze the error.
To find out why your program tries to open a file with an empty name, you can run it in a debugger or add more output to see how many loop cycles are executed when processing the lists and which data you find.

Counting Words from Text File into Linked List in C

I am new to programming in C and for an assignment for one of my classes, we are to write code that would read in a text file, put all the words in a linked list with a counter, and output that linked list into a text file.
I'm not sure what I am doing wrong in my below code, it only counts some of the words and seems to skip others. (I added many extra prints to try and see where the words are lost)
I then attempted to make all the words lowercase when they were added to the linked list, but that seemed to make it read even less words and I have no clue why.
Any help/suggestions would be greatly appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct s_words {
char *str; //word
int count; //number of times word occurs
struct s_words* next; //pointer to next word
} words;
words* create_words(char* word) {
//+allocate space for the structure
printf("%lu ", strlen(word));
words* newWord = malloc(sizeof(words));
if (NULL != newWord)
{
//+allocate space for storing the new word in "str"
//+if str was array of fixed size, storage wud be wasted
newWord->str = (char *)malloc((strlen(word))+1);
strcpy(newWord->str, word); //+copy “word” into newWord->str
newWord->str[strlen(word)]='\0';
printf(" Create: %s ", newWord->str);
//+initialize count to 1;
newWord->count = 1;
//+initialize next;
newWord->next = NULL;
}
return newWord;
}
words* add_word(words* wordList, char* word)
{
int found=0;
words *temp=wordList;
//+ search if word exists in the list; if so, make found=1
while (temp != NULL)
{
if (strcmp(temp->str, word) == 0)
{ //+use strcmp command
found=1;
temp->count = temp->count+1; //+increment count;
return wordList;
}
else
{
//+update temp
temp = temp->next;
}
}
if (found==0)
{ //new word
//printf("%s ", word);
words* newWord = create_words(word);
if (NULL != newWord)
{
//+?? Insert new word at the head of the list
newWord->next = wordList;
printf(" NEW WORD: %s\n ", newWord->str);
}
return newWord;
}
//return wordList; //code never gets here, just added in case of error
}
int main(int argc, char* argv[])
{
words *mywords; //+head of linked list containing words
mywords=NULL;
FILE *myFile;
FILE *myOutput;
char* filename = argv[1];
char* outputfile = argv[2];
myFile = fopen(filename, "r"); //+first parameter is input file
if (myFile==0)
{
printf("file not opened\n");
return 1;
}
else
{
printf("file opened \n");
}
//+start reading file character by character;
//+when word has been detected; call the add_word function
int ch = 0, word = 1, k = 0;
char thisword[100];
//ch = putchar(tolower(ch));
//ch = fgetc(myFile);
while ((ch = fgetc(myFile)) != EOF )
{
//error handling
if (ch == '.' || ch == ' ' || ch == ',' || ch == ':' || ch == ';' || ch == '\n') //+detect new word? Check if ch is a delimiter
{ //when above if is true, new word created in next if:
if ( word == 1 ) //+make sure previous character was not delimiter
{
word = 0;
//+make the kth character of thisword as \0
thisword[k] = '\0';
//+now call add_word to add thisword into the list
printf(" Add:%s ", thisword);
mywords = add_word(mywords, thisword);
printf(" Added:%s\n", mywords->str);
k=0;
}
}
else
{
word = 1;
//make ch lowercase
//ch = putchar(toupper(ch));
//+?? //make the kth character of thisword equal to ch
thisword[k] = ch;
thisword[k] = putchar(tolower(thisword[k]));
k++;
}
}
if (word == 1)
{
thisword[k] = '\0';
//add thisword into the list
printf("Last Word:%s ", thisword);
mywords = add_word(mywords, thisword);
}
words *currword;
printf("printing list\n");
//+Traverse list and print each word and its count to outputfile
//+output file is second parameter being passed
myOutput = fopen(outputfile, "w+"); //+first parameter is input file
if (myOutput == 0)
{
printf("output file not opened \n");
return 1;
}
else
{
printf("output file opened \n");
}
currword = mywords;
while (currword->next != NULL)
{
//add word name then word count to file, then move to next
fprintf(myOutput, "%s %d \n", currword->str, currword->count);
printf("%s ", currword->str);
currword = currword->next;
}
return 0;
}
You sure like making things hard on yourself... Your four biggest issues were (1) not passing the address of mywords to add_words, (2) failing to handle the New/Empty list case in add_words, (3) adding the new nodes to the head of the list, and (4) overwriting your list address every time you called add_word (e.g. mywords = add_words...)
Fixing each of those problems and tidying up parsing a bit, you should be able to find all of your words in your list. Look over/test the following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct s_words {
char *str; //word
int count; //number of times word occurs
struct s_words *next; //pointer to next word
} words;
words *create_words (char *word)
{
//+allocate space for the structure
printf ("%lu ", strlen (word));
words *newWord = malloc (sizeof (words));
if (NULL != newWord) {
//+allocate space for storing the new word in "str"
//+if str was array of fixed size, storage wud be wasted
newWord->str = (char *) malloc ((strlen (word)) + 1);
strcpy (newWord->str, word); //+copy “word” into newWord->str
newWord->str[strlen (word)] = '\0';
printf (" Create: %s ", newWord->str);
//+initialize count to 1;
newWord->count = 1;
//+initialize next;
newWord->next = NULL;
}
return newWord;
}
words *add_word (words **wordList, char *word)
{
if (!*wordList) { /* handle EMPTY list */
printf ("NEW LIST\n");
return *wordList = create_words (word);
}
words *temp = *wordList;
//+ search if word exists in the list; if so, make found=1
while (temp->next != NULL) { /* iterate while temp->next != NULL */
if (strcmp (temp->str, word) == 0) { //+use strcmp command
temp->count = temp->count + 1; //+increment count;
return *wordList;
}
else
temp = temp->next; //+update temp
}
words *newWord = create_words (word);
if (NULL != newWord) { /* insert at TAIL of list */
temp->next = newWord;
printf (" NEW WORD: %s\n ", newWord->str);
}
return newWord;
}
int main (int argc, char *argv[]) {
words *mywords; //+head of linked list containing words
mywords = NULL;
char *delim = ". ,:;\t\n";
FILE *myFile;
FILE *myOutput;
char *filename = argv[1];
char *outputfile = argv[2];
if (argc != 3) {
fprintf (stderr, "error: insufficient input. usage: %s ifile ofile\n",
argv[0]);
return 1;
}
myFile = fopen (filename, "r"); //+first parameter is input file
if (myFile == 0) {
printf ("file not opened\n");
return 1;
} else {
printf ("file opened \n");
}
//+start reading file character by character;
//+when word has been detected; call the add_word function
int ch = 0, word = 1, k = 0;
char thisword[100];
while ((ch = fgetc (myFile)) != EOF) { /* for each char */
if (strchr (delim, ch)) { /* check if delim */
if (word == 1) { /* if so, terminate word, reset */
word = 0;
thisword[k] = '\0';
printf ("\nadd_word (mywords, %s)\n", thisword);
/* do NOT overwrite list address each time,
* you must send ADDRESS of list to add_word
* to handle EMPTY list case.
*/
if (add_word (&mywords, thisword))
printf (" added: %s\n", mywords->str);
else
fprintf (stderr, "error: add_word failed.\n");
k = 0;
}
}
else { /* if not delim, add char to string, set word 1 */
word = 1;
thisword[k++] = tolower (ch); /* make ch lowercase */
}
}
if (word == 1) { /* handle non-POSIX line-end */
thisword[k] = '\0';
//add thisword into the list
printf ("\nadd_word (mywords, %s) (last)\n", thisword);
if (add_word (&mywords, thisword)) /* same comment as above */
printf (" added: %s\n", mywords->str);
else
fprintf (stderr, "error: add_word failed.\n");
}
words *currword;
printf ("printing list\n");
//+Traverse list and print each word and its count to outputfile
//+output file is second parameter being passed
myOutput = fopen (outputfile, "w+"); //+first parameter is input file
if (myOutput == 0) {
printf ("output file not opened \n");
return 1;
} else {
printf ("output file opened \n");
}
currword = mywords;
while (currword != NULL) { /* just test currword here */
//add word name then word count to file, then move to next
fprintf (myOutput, "%s %d \n", currword->str, currword->count);
printf ("%s ", currword->str);
currword = currword->next;
}
putchar ('\n');
return 0;
}
Input File
$ cat ../dat/captnjack.txt
This is a tale
Of Captain Jack Sparrow
A Pirate So Brave
On the Seven Seas.
Test Use
$ ./bin/llwordcount ../dat/captnjack.txt dat/llout.txt
Output File
$ cat dat/llout.txt
this 1
is 1
a 2
tale 1
of 1
captain 1
jack 1
sparrow 1
pirate 1
so 1
brave 1
on 1
the 1
seven 1
seas 1
note: for printing/output, you simply want while (currword != NULL) to traverse the list.
Now with that said, you should really consider using line-oriented input (fgets or getline) and parsing each line of data into words rather than reading character-by-character and looking for delimiters. It is much easier and less error prone to read/parse a line-at-a-time. Since line-oriented input is buffered, it is also a much faster read. You can read a character at a time, it is just slower and there are a lot more pitfalls along the way.
Digest the changes (commented above with /* ... */) and let me know if you have any questions.

How would I read a text file in C?

I have file.txt with
123456 2,00 beer
234567 2,50 milk
345678 3,30 ice cream
I want to put this info in my dynamic two-dimensional array:
char **dataBase;
dataBase = (char**)malloc(NUM_OF_PROD * sizeof(char*));
for(i = 0; i < NUM_OF_PROD; i++){
dataBase[i] = (char*)malloc(MAX_BUFFER* sizeof(char));
}
But I don't know how. We have here 3 lines. If it was a C++, I would use getline() but in this situation I can't find a solution.
I usually use the fgets() function to a file on a line-per-line basis (provided it is a text file).
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define LINELEN 200
#define NAMELEN 40
struct PRICELIST
{
char item[NAMELEN];
float price;
unsigned int order_no;
struct PRICELIST *next;
struct PRICELIST *prev;
};
void list_print_node (struct PRICELIST *node)
{
printf ("%d %4.2f %s\n", node->order_no, node->price, node->item);
}
void list_print (struct PRICELIST *head)
{
printf ("Order # Price Item\n");
printf ("------------------------------\n");
while (head)
{
list_print_node (head);
head = head->next;
}
}
void list_delete (struct PRICELIST *head)
{
if (head)
{
/* recursive call */
list_delete (head->next);
free (head);
}
}
struct PRICELIST *list_read (char *filename)
{
FILE *file;
char line[LINELEN];
struct PRICELIST *pricelist, *node, *prev;
char *p;
size_t len;
file = fopen (filename, "r");
if (file == NULL)
{
perror (filename);
return NULL;
}
pricelist = NULL;
prev = NULL;
while (1)
{
if (fgets (line, sizeof(line), file) == NULL)
break;
/* eat the newline at the end of the buffer, be CR/CRLF agnostic .. */
len = strlen (line) - 1;
if (line[len] == '\r' || line[len] == '\n')
{
line[len] = '\0';
len --;
}
if (line[len] == '\r' || line[len] == '\n')
line[len] = '\0';
/* allocate a new node in the list */
node = malloc (sizeof (struct PRICELIST));
if (node)
{
/* now use sscanf() for getting single elements */
sscanf (line, "%d %f", &node->order_no, &node->price);
/* since the item name might contain spaces this is not so easy .. */
p = line;
while (isspace(*p)) p++;
while (isdigit(*p)) p++;
while (isspace(*p)) p++;
while (isdigit(*p)) p++;
while (ispunct(*p)) p++;
while (isdigit(*p)) p++;
while (isspace(*p)) p++;
strncpy (node->item, p, sizeof(node->item));
node->next = NULL;
/* if this is the first node of the list assign the head to it */
if (pricelist == NULL)
pricelist = node;
/* append the new node to the end of the linked list */
if (prev)
prev->next = node;
node->prev = prev;
/* save it for the next entry */
prev = node;
}
}
/* we are done with the file, close it */
fclose (file);
return pricelist;
}
/* let's test it */
int main (int argc, char *argv[])
{
struct PRICELIST *pricelist;
if (argc < 2)
{
printf ("Usage: %s filename\n", argv[0]);
return 0;
}
pricelist = list_read (argv[1]);
if (pricelist)
{
/* print the list */
printf ("This is the price list (filename '%s'):\n\n", argv[1]);
list_print (pricelist);
/* delete the list */
list_delete (pricelist);
}
return 0;
}
In the comments you mentioned you were only concerned about actually reading a file.
Here's how you'd go about reading a file (currently untested, binary mode):
#include <stdio.h>
int main()
{
FILE *file = fopen("path/to/your/file/yourfile.txt", "rb");
if(!file) return 1; //something went wrong!
long size = fseek(file, 0, SEEK_END);
char *buf = malloc(size);
fread(&buf, size, 1, file); //read all contents, once
fclose(file);
free(buf); //because this is just an example
return 0;
}
For more info on reading a file, just do a quick google search and you'll find almost everything you're looking for.
You can implement your own version of getline using fgetc and realloc.
#include <stdio.h>
#include <stdlib.h>
char *getline(FILE *file)
{
size_t size = 16; // Size of memory allocated for line
size_t len = 0; // Characters read
char *line = malloc(size);
// Return NULL if memory allocation fails
if (line == NULL)
return NULL;
for(;;) {
int c;
switch (c = fgetc(file)) {
// If End Of File is met, return the line up until this point
// if anything has been read
case EOF:
if (len == 0) {
free(line);
return NULL;
}
else {
line[len+1] = '\0';
return line;
}
case '\n':
line[len+1] = '\0'; // NUL terminate the string
return line;
default:
line[len++] = c;
}
// If the string plus NUL terminator is longer than size
// double the size of line
if (len + 1 >= size) {
size *= 2;
line = realloc(line, size);
// Return NULL if memory allocation fails
if (line == NULL)
return NULL;
}
}
}
There are also many free/open source implementations of the same function that can be found online. For instance this GPL 2 one. If you are on a POSIX system (e.g. OS X or Linux) there is already a version of getline found in stdio.h.

Read tab delimited file to Structure in C

I have a file with tab delimited data. I want to read the every line into a Structure. I have a code to read the data to char buffer. But I want to load the data into a Structure.
This is My sample data.
empname1\t001\t35\tcity1
empname2\t002\t35\tcity2
My Structure definition .
struct employee
{
char *empname;
char *empid;
int age;
char *addr;
};
My sample program to read data to a char array buffer
char buffer[BUF_SIZE]; /* Character buffer */
input_fd = open (fSource, O_RDONLY);
if (input_fd == -1) {
perror ("open");
return 2;
}
while((ret_in = read (input_fd, &buffer, BUF_SIZE)) > 0){
// Do Some Process
}
Here I want to load the content to a structure variable instead of the character buffer. How I can achieve that?
Well, a possible solution could be
Read a complete line from the file using fgets().
tokenize the input buffer based on the required delimiter [tab in your case] using strtok().
allocate memory (malloc()/ realloc()) to a pointer variable of your structure.
copy the tokenized inputs into the member variables.
Note:
1. fgets() reads and stores the trailing \n.
2. Please check carefully how to use strtok(). The input string should be mutable.
3. Allocate memory to pointers before using them. IMO, use statically allocated array as struct employee member variables.
You can use the fscanf function. Open a file as a stream then use the fscanf to get a input from the file.
int fscanf(FILE *stream, const char *format, ...);
FILE *fp=fopen(fsource,"r+");
struct employee detail;
fscanf(fp,"%s %s %d %s",detail.empname,detail.empid,&detail.age,detail.addr);
Make sure that allocation of memory to the variables.
Or else you can use the strtok function. That time you have to use the sscanf function.
You can use fscanf to read each line from file, strtok to tokenize the line read.
Since your structure members are pointers, allocate memory appropriately.
The following minimal code does exactly what you want.
#define SIZE 50
FILE *fp = NULL;
int i = 0;
struct employee var = {NULL, NULL, 0, NULL};
char line[SIZE] = {0}, *ptr = NULL;
/* 1. Open file for Reading */
if (NULL == (fp = fopen("file.txt","r")))
{
perror("Error while opening the file.\n");
exit(EXIT_FAILURE);
}
/* 2. Allocate Memory */
var.empname = malloc(SIZE);
var.empid = malloc(SIZE);
var.addr = malloc(SIZE);
/* 3. Read each line from the file */
while (EOF != fscanf(fp, "%s", line))
{
/* 4. Tokenise the read line, using "\" delimiter*/
ptr = strtok(line, "\\");
var.empname = ptr;
while (NULL != (ptr = strtok(NULL, "\\")))
{
i++;
/* 5. Store the tokens as per structure members , where (i==0) is first member and so on.. */
if(i == 1)
var.empid = ptr;
else if(i == 2)
var.age = atoi(ptr);
else if (i == 3)
var.addr = ptr;
}
i = 0; /* Reset value of i */
printf("After Reading: Name:[%s] Id:[%s] Age:[%d] Addr:[%s]\n", var.empname, var.empid, var.age, var.addr);
}
Working Demo: http://ideone.com/Kp9mzN
Few things to Note here:
This is guaranteed to work, as long as your structure definition (and order of members) remains the same (see manipulation of value i).
strtok(line, "\\");, Second argument is just escaping (first \) the actual \ character.
Clarification from the OP:
In your structure definition, third member is an int, however you're trying to read t35 into it (which is a string).
So var.age = atoi(ptr); will give you 0,
You could change the structure definition, making third member as char * and allocating memory like other members.
Or change file contents, making sure an int is present as the third value.
I think this may be what you are looking for
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
struct employee
{
char *empname;
char *empid;
int age;
char *addr;
};
int readEmploee(char *line, struct employee *employee)
{
char *token;
char *saveptr;
char *endptr;
if ((employee == NULL) || (line == NULL))
return 0;
token = strtok_r(line, "\t", &saveptr);
if (token == NULL)
return 0;
employee->empname = strdup(token);
token = strtok_r(NULL, "\t", &saveptr);
if (token == NULL)
return 0;
employee->empid = strdup(token);
token = strtok_r(NULL, "\t", &saveptr);
if (token == NULL)
return 0;
employee->age = strtol(token, &endptr, 10);
if (*endptr != '\0')
return 0;
token = strtok_r(NULL, "\t", &saveptr);
if (token == NULL)
return 0;
employee->addr = strdup(token);
return 1;
}
char *mygetline(int fd)
{
char *line;
size_t length;
size_t count;
char character;
line = malloc(128);
if (line == NULL)
return NULL;
length = 0;
count = 1;
do
{
if (read(fd, &character, 1) != 1) /* end of file probably reached */
{
free(line);
return NULL;
}
else if (character != '\n')
{
if (length > 128 * count)
{
char *temp;
temp = realloc(line, 128 * count);
if (temp == NULL)
{
free(line);
return NULL;
}
line = temp;
count += 1;
}
line[length++] = character;
}
} while (character != '\n');
line[length] = 0;
return line;
}
struct employee *readFile(const char *const fSource, size_t *count)
{
struct employee *employees;
int employeeCount;
int input_fd;
char *line;
if ((count == NULL) || (fSource == NULL))
return NULL;
*count = 0;
employees = NULL;
employeeCount = 0;
input_fd = open (fSource, O_RDONLY);
if (input_fd == -1)
{
perror ("open");
return NULL;
}
while ((line = mygetline(input_fd)) != NULL)
{
struct employee employee;
if (readEmploee(line, &employee) != 0)
{
struct employee *temp;
temp = realloc(employees, (1 + employeeCount) * sizeof(struct employee));
if (temp != NULL)
employees = temp;
employees[employeeCount++] = employee;
}
free(line);
}
*count = employeeCount;
return employees;
}
int
main()
{
size_t count;
size_t index;
struct employee *employees;
employees = readFile("somesamplefile.txt", &count);
if (employees == NULL)
return 1;
for (index = 0 ; index < count ; index++)
{
struct employee current;
current = employees[index];
fprintf(stderr, "%s, %s, %d, %s\n", current.empname, current.empid, current.age, current.addr);
if (current.empname != NULL)
free(current.empname);
if (current.empid != NULL)
free(current.empid);
if (current.addr != NULL)
free(current.addr);
}
free(employees);
return 0;
}

Easiest way to read this line of text into a struct?

I have a text file with data in the form:
Lee AUS 2 103 2 62 TRUE
Check AUS 4 48 0 23 FALSE
Mills AUS 8 236 0 69 FALSE
I need to each line into a struct like, however I'd like to avoid using fixed length arrays (the problem with fgets as far as I can tell):
struct Data
{
char *sname;
char *country;
int *a;
int *b;
int *c;
int *d;
char *hsisno;
};
I am very new to C. Should I use fscanf, or fgets?
fscanf stands for "file scan formatted" and user data is about as unformatted as you can get.
You should never use naked "%s" format strings on data where you don't have absolute control over what can be read.
The best solution is to use fgets to read a line since this allows you to prevent buffer overflow.
Then, once you know the size of your line, that's the maximum size of each string that you will require. Use sscanf to your heart's content to get the actual fields.
One final thing. It's probably a bit wasteful having int* types for the integers, since you know they have a specific maximum size already. I'd use the non-pointer variant, something like:
struct Data {
char *sname; char *country;
int a; int b; int c; int d;
char *hsisno;
};
By way of example, here's some safe code:
#include <stdio.h>
#include <string.h>
// Here's all the stuff for a linked list of your nodes.
typedef struct sData {
char *sname; char *country; char *hsisno;
int a; int b; int c; int d;
struct sData *next;
} Data;
Data *first = NULL; Data *last = NULL;
#define MAXSZ 100
int main (void) {
char line[MAXSZ], sname[MAXSZ], country[MAXSZ], hsisno[MAXSZ];
int a, b, c, d;
FILE *fIn;
Data *node;
// Open the input file.
fIn = fopen ("file.in", "r");
if (fIn == NULL) {
printf ("Cannot open file\n");
return 1;
}
// Process every line.
while (fgets (line, sizeof(line), fIn) != NULL) {
// Check line for various problems (too short, too long).
if (line[0] == '\0') {
printf ("Line too short\n");
return 1;
}
if (line[strlen (line)-1] != '\n') {
printf ("Line starting with '%s' is too long\n", line);
return 1;
}
line[strlen (line)-1] = '\0';
// Scan the individual fields.
if (sscanf (line, "%s %s %d %d %d %d %s",
sname, country, &a, &b, &c, &d, hsisno) != 7)
{
printf ("Line '%s' didn't scan properly\n", line);
return 1;
}
// Allocate a new node to hold data.
node = malloc (sizeof (Data));
if (node == NULL) {
printf ("Ran out of memory\n");
return 1;
}
node->sname = strdup (sname);
node->country = strdup (country);
node->a = a;
node->b = b;
node->c = c;
node->d = d;
node->hsisno = strdup (hsisno);
node->next = NULL;
if (first != NULL) {
last->next = node;
last = node;
} else {
first = node;
last = node;
}
}
fclose (fIn);
// Output the list for debugging.
node = first;
while (node != NULL) {
printf ("'%s' '%s' %d %d %d %d '%s'\n",
node->sname, node->country, node->a, node->b,
node->c, node->d, node->hsisno);
node = node->next;
}
return 0;
}
which reads in your file and stores it in a linked list. It outputs:
'Lee' 'AUS' 2 103 2 62 'TRUE'
'Check' 'AUS' 4 48 0 23 'FALSE'
'Mills' 'AUS' 8 236 0 69 'FALSE'
at the end, as expected.
I've done a whole series of answers on the pitfalls of using *scanf functions on non-controlled data (enter user:14860 fgets into the search box above), some of which (here, here and here, for example) include a perennial favourite function of mine, getLine, for safer user input.

Resources