How to load multiple "clones" of structure from FILE? C - c

I want to learn how to load multiple structures (many students: name, surname, index, address...) from a text file looking like:
Achilles, 9999
Hector, 9998
Menelaos, 9997
... and so on
Struct can be like:
struct student_t {
char *name;
int index;
}
My attempt (doesn't work; I'm not even sure if fgets+sscanf is a considerable option here):
int numStudents=3; //to simplify... I'd need a function to count num of lines, I imagine
int x, y=1000, err_code=1;
FILE *pfile = fopen("file.txt", "r");
if(pfile==0) {return 2;}
STUDENT* students = malloc(numStudents * sizeof *students);
char buffer[1024];
char *ptr[numStudents];
for (x = 0; x < numStudents; x++){ //loop for each student
students[x].name=malloc(100); //allocation of each *name field
fgets(buffer, 100, pfile); //reads 1 line containing data of 1 student, to buffer
if(x==0) *ptr[x] = strtok(buffer, ",");//cuts buffer into tokens: ptr[x] for *name
else *ptr[x] = strtok(NULL, ","); //cuts next part of buffer
sscanf(ptr[x], "%19s", students[x].name); //loads the token to struct field
*ptr[y] = strtok(NULL, ","); //cuts next part of the buffer
students[y].index = (int)strtol(ptr[y], NULL, 10); //loads int token to struct field
*buffer='\0';//resets buffer to the beginning for the next line from x++ fgets...
y++;//the idea with y=1000 is that I need another pointer to each struct field right?
}
for (x = 0; x < numStudents; x++)
printf("first name: %s, index: %d\n",students[x].name, students[x].index);
return students;
Then printf it to see what was loaded. (to simplify my real structure that has 6 fields). I know a nice method to load 1 student from user input...(How to scanf commas, but with commas not assigned to a structure? C) however to load multiple, I have this idea but I'm not sure if it's too clumsy to work or just terrybly written.
Later I'd try to sort students by name , and perhaps even try to do a realloc buffer that increases it's size along with new students being loaded to buffer... and then to sort what'd been loaded... but I imagine that first I need to load it from the file to buffer and from buffer to fill structure, to be able to sort it then?...
Thanks A LOT for all the help!

C is a little harsh. I use GNU getline below, which may be not portable, which you might end up implementing yourself. I use stdin for input FILE * just for simplicity.
The program reads the students list into the students array. Then I sort the students by comparing indexes, then by name, each time with printing out.
Your code is a bit of a mishmash - try to write a separate function for loading a single student, you don't need char ptr[students] just a single char *ptr for strtok function. strtok is a little mixy, I prefer using just strchr mutliple times. I used memcpy to just copy the name from the string and remember to null delimeter it.
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h>
struct student_s {
char *name;
int index;
};
static int students_name_cmp(const void *a, const void *b)
{
const struct student_s *s1 = a;
const struct student_s *s2 = b;
return strcmp(s1->name, s2->name);
}
static int students_index_cmp(const void *a, const void *b)
{
const struct student_s *s1 = a;
const struct student_s *s2 = b;
return s1->index - s2->index;
}
int main()
{
struct student_s *students = NULL;
size_t students_cnt = 0;
FILE *fp = stdin;
size_t read;
char *line = NULL;
size_t len = 0;
// for each line
while ((read = getline(&line, &len, fp)) != -1) {
// resize students!
students = realloc(students, (students_cnt + 1) * sizeof(*students));
// handle erros
if (students == NULL) {
fprintf(stderr, "ERROR allocating students!\n");
exit(-1);
}
// find the comma in the line
const const char * const commapos = strchr(line, ',');
if (commapos == NULL) {
fprintf(stderr, "ERROR file is badly formatted!\n");
exit(-1);
}
// student has the neme between the start to the comma adding null delimeter
const size_t namelen = (commapos - line) + 1;
// alloc memory for the name and copy it and null delimeter it
students[students_cnt].name = malloc(namelen * sizeof(char));
// handle errors
if (students[students_cnt].name == NULL) {
fprintf(stderr, "ERROR allocating students name!\n");
exit(-1);
}
memcpy(students[students_cnt].name, line, namelen - 1);
students[students_cnt].name[namelen] = '\0';
// convert the string after the comma to the number
// strtol (sadly) discards whitespaces before it, but in this case it's lucky
// we can start after the comma
errno = 0;
char *endptr;
const long int tmp = strtol(&line[namelen], &endptr, 10);
// handle strtol errors
if (errno) {
fprintf(stderr, "ERROR converting student index into number\n");
exit(-1);
}
// handle out of range values, I use INT_MIN/MAX cause index is int, no better idea, depends on application
if (tmp <= INT_MIN || INT_MAX <= tmp) {
fprintf(stderr, "ERROR index number is out of allowed range\n");
exit(-1);
}
students[students_cnt].index = tmp;
// handle the case when the line consist of any more characters then a string and a number
if (*endptr != '\n' && *endptr != '\0') {
fprintf(stderr, "ERROR there are some rabbish characters after the index!");
exit(-1);
}
// finnally, increment students count
students_cnt++;
}
if (line) {
free(line);
}
// sort by index
qsort(students, students_cnt, sizeof(*students), students_index_cmp);
// print students out sorted by index
printf("Students sorted by index:\n");
for (size_t i = 0; i < students_cnt; ++i) {
printf("student[%zu] = '%s', %d\n", i, students[i].name, students[i].index);
}
// now we have students. We can sort them.
qsort(students, students_cnt, sizeof(*students), students_name_cmp);
// print students out sorted by name
printf("Students sorted by name:\n");
for (size_t i = 0; i < students_cnt; ++i) {
printf("student[%zu] = '%s', %d\n", i, students[i].name, students[i].index);
}
// free students, lucky them!
for (size_t i = 0; i < students_cnt; ++i) {
free(students[i].name);
}
free(students);
return 0;
}
For the following input on stdin:
Achilles, 9999
Hector, 9998
Menelaos, 9997
the program outputs:
Students sorted by index:
student[0] = 'Menelaos', 9997
student[1] = 'Hector', 9998
student[2] = 'Achilles', 9999
Students sorted by name:
student[0] = 'Achilles', 9999
student[1] = 'Hector', 9998
student[2] = 'Menelaos', 9997
A test version available here on onlinegdb.

Related

Read a CSV file into a dynamic array of structs C

I'm fairly new to C. I'm trying to read a .CSV file, then parse each line, then store the data in a dynamic array of pointers to structs. Unfortunately I've gone wrong somewhere in my implementation which is resulting in an infinite loop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct dataSet {
char ID;
char postcode;
int population;
char contact;
double x;
double y;
}data;
int main(int argc, char* argv[]) {
char line[100] = "";
int count = 0;
int each = 0;
data *allData = NULL;
data *temp = NULL;
FILE *file = fopen("dataset.csv", "r");
if (file == NULL)
{
printf("Error! File null");
return 1;
}
while (fgets(line, sizeof line, file))
{
if(NULL == (temp = realloc(allData, sizeof(*allData) * (count + 1))))
{
fprintf(stderr, "realloc problem\n");
fclose(file);
free(allData);
return 0;
}
allData = temp;
if (6 == scanf(line, "%s, %s, %d, %s, %lf, %lf",
&allData[count].ID,
&allData[count].postcode,
&allData[count].population,
&allData[count].contact,
&allData[count].x,
&allData[count].y)) {
count++;
}
else {
printf("Problem with data\n");
}
}
fclose(file);
for (each = 0; each < count; each++)
{
printf("%s, %s, %d, %s, %lf, %lf\n",
&allData[count].ID,
&allData[count].postcode,
&allData[count].population,
&allData[count].contact,
&allData[count].x,
&allData[count].y);
}
free(allData);
return 0;
}
Any help or tips would be greatly appreciated.
[s]scanf() is a nasty function. You don't have enough control once it fails. Problem is: there are too many conditions: the input can be incorrect, or the destination is not large enough. Even reading complete lines with fgets(), and parsing them afterwards, will only allow you to skip complete lines; also: the line buffer is mostly fixed sized, and fgets() could read incomplete lines. A way to keep complete control is to read character-based. This might imply a Finite State machine.
A simpler reader (using a zero-state machine) could be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct omg {
char o;
int m;
char g[11];
};
struct wtf {
unsigned size;
unsigned used;
struct omg *array;
};
#define INITIAL_SIZE 7
struct wtf read_stuff(char *name)
{
FILE *fp;
unsigned icol,irec,len;
char buff[123];
struct wtf this = {0,0,NULL};
fp = fopen(name, "rb" );
if (!fp) return this;
for (icol=irec=len=0; ; ) {
int ch;
if (this.used >= this.size) {
size_t newsize;
struct omg *tmp;
newsize = this.size? this.size*2: INITIAL_SIZE;
fprintf(stderr, "Realloc(%zu)\n", newsize);
tmp = realloc(this.array, sizeof *this.array * newsize);
this.array = tmp;
this.size = newsize;
}
ch = getc(fp);
switch(ch) {
case '\r' : continue;
/* End of field or record: terminate buffer */
#if 0
case ',' :
#else
case '\t' :
#endif
case '\n' :
buff[len] = 0;
break;
case EOF :
goto done;
/* Normal character: assign to buffer
** You may want to report too long fields here
*/
default:
if (len >= sizeof buff -2) continue;
buff[len++] = ch;
continue;
}
/* When we arrive here, we have a new field. Let's process it ...*/
switch (icol) {
case 0: /* Assign first field here from buff[], (dont forget to check len!) */
this.array[this.used].o = buff[0];
break;
case 1: /* Assign second field from buff[], this may need some additional checks
** You may want to avoid sscanf() here ...
*/
sscanf(buff, "%d", &this.array[this.used].m );
break;
case 2: /* Assign third field from buff[] */
if (len >= sizeof this.array[this.used].g)
len = sizeof this.array[this.used].g -1;
memcpy (this.array[this.used].g, buff, len);
this.array[this.used].g[len] = 0;
break;
default: /* Ignore excess fields
** You may want to report hem.
*/
break;
}
/* Do some bookkeeping */
len = 0;
if(ch == '\n') {
/* You may want to check if icol==2, here */
icol=0; irec++; this.used++;
}
else icol++;
}
done:
fclose(fp);
/* You could do a final realloc() here */
return this;
}
int main(int argc, char **argv)
{
struct wtf result;
unsigned idx;
result = read_stuff(argv[1] );
fprintf(stderr, "Result=%u/%u\n", result.used,result.size);
for (idx=0; idx < result.used; idx++) {
printf("%c %d %s\n"
, result.array[idx].o
, result.array[idx].m
, result.array[idx].g);
if (idx >= 10) break;
}
return 0;
}
You ask for tips...
1 - your struct is wrong if your plan was to use dynamic memory. The char members should be pointers to char, ( char * not char ) as shown below. But to reduce complexity, use char arrays instead of forcing dynamic allocation for struct members: i.e. do not use this:
typedef struct dataSet {
char *ID;
char *postcode;
int population;
char *contact;
double x;
double y;
}data;
Rather use this:
typedef struct dataSet {
char ID[80];
char postcode[11];
int population;
char contact[80];
double x;
double y;
}data;
If the lengths are not right, then make them bigger, but this will reduce calls to calloc() and free().
2 - suggested steps:
Count lines in file. (example here). This will essentially open the file, count the lines and close the file.
Use the count to allocate memory for that number of instances of data (i.e. data *records = malloc(sizeof(*records)*countOfLines); )
Open the file again. If file != NULL, then...
Begin to read file line by line in a loop, such as the fgets(...) loop you have.
In this loop, suggest replacing scanf() with a series of calls to strtok() making the appropriate conversion one-by-one. Its a few more lines of code, but is easier in the long run to see what parsing problems you might run into.
The following pseudo code illustrates...
data *record = malloc(CountOfLines*sizeof(*record));
if(record)
{
int i = 0;
while(fgets(line, sizeof line, file))
{
tok = strtok(line, ",");
if(tok)
{ //convert string
strncpy(record[i].ID, tok, sizeof(record[i].ID) - 1);
tok = strtok(NULL, ",");
if(tok)
{//convert string
strncpy(record[i].postcode, tok, sizeof(record[i].postcode) - 1);
tok = strtok(NULL, ",");
if(tok)
{//convert int
record[i].population = atoi(tok);
//and so on ...

c read block of lines and store them [duplicate]

I am really new to C, and the reading files thing drives me crazy...
I want read a file including name, born place and phone number, etc. All separated by tab
The format might be like this:
Bob Jason Los Angeles 33333333
Alice Wong Washington DC 111-333-222
So I create a struct to record it.
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
I tried many ways to read this file into struct but it failed.
I tired fread:
read_file = fopen("read.txt", "r");
Person temp;
fread(&temp, sizeof(Person), 100, read_file);
printf("%s %s %s \n", temp.name, temp.address, temp.phone);
But char string does not recorded into temp separated by tab, it read the whole file into temp.name and get weird output.
Then I tried fscanf and sscanf, those all not working for separating tab
fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);
Or
fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);
This separates the string by space, so I get Bob and Jason separately, while indeed, I need to get "Bob Jason" as one char string. And I did separate these format by tab when I created the text file.
Same for sscanf, I tried different ways many times...
Please help...
I suggest:
Use fgets to read the text line by line.
Use strtok to separate the contents of the line by using tab as the delimiter.
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
char* token = strtok(line, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
// Copy token at most the number of characters
// temp.name can hold. Similar logic applies to address
// and phone number.
temp.name[0] = '\0';
strncat(temp.name, token, sizeof(temp.name)-1);
}
token = strtok(NULL, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.address[0] = '\0';
strncat(temp.address, token, sizeof(temp.address)-1);
}
token = strtok(NULL, "\n");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.phone[0] = '\0';
strncat(temp.phone, token, sizeof(temp.phone)-1);
}
Update
Using a helper function, the code can be reduced in size. (Thanks #chux)
// The helper function.
void copyToken(char* destination,
char* source,
size_t maxLen;
char const* delimiter)
{
char* token = strtok(source, delimiter);
if ( token != NULL )
{
destination[0] = '\0';
strncat(destination, token, maxLen-1);
}
}
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
copyToken(temp.name, line, sizeof(temp.name), "\t");
copyToken(temp.address, NULL, sizeof(temp.address), "\t");
copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");
This is only for demonstration, there are better ways to initialize variables, but to illustrate your main question i.e. reading a file delimited by tabs, you can write a function something like this:
Assuming a strict field definition, and your struct definition you can get tokens using strtok().
//for a file with constant field definitions
void GetFileContents(char *file, PERSON *person)
{
char line[260];
FILE *fp;
char *buf=0;
char temp[80];
int i = -1;
fp = fopen(file, "r");
while(fgets(line, 260, fp))
{
i++;
buf = strtok(line, "\t\n");
if(buf) strcpy(person[i].name, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].address, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].phone, buf);
//Note: if you have more fields, add more strtok/strcpy sections
//Note: This method will ONLY work for consistent number of fields.
//If variable number of fields, suggest 2 dimensional string array.
}
fclose(fp);
}
Call it in main() like this:
int main(void)
{
//...
PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere
//and there are better ways
//this is just for illustration
pPerson = &person[0];//initialize pointer to person
GetFileContents(filename, pPerson); //call function to populate person.
//...
return 0;
}
First thing,
fread(&temp, sizeof(temp), 100, read_file);
will not work because the fields are not fixed width, so it will always read 20 characters for name 30 for address and so on, which is not always the correct thing to do.
You need to read one line at a time, and then parse the line, you can use any method you like to read a like, a simple one is by using fgets() like this
char line[100];
Person persons[100];
int index;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
persons[i++] = parseLineAndExtractPerson(line);
}
Now we need a function to parse the line and store the data in you Person struct instance
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
Here is a sample implementation of a loop to read at most 100 records
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/path/to/the/file.type", "r");
if (read_file == NULL)
return -1;
index = 0;
while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL))
{
size_t length;
/* remove the '\n' left by `fgets()'. */
length = strlen(line);
if ((length > 0) && (line[length - 1] == '\n'))
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Here is a complete program that does what I think you need
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/home/iharob/data.dat", "r");
if (read_file == NULL)
return -1;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
size_t length;
length = strlen(line);
if (line[length - 1] == '\n')
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Parsing strings returned by fgets can be very annoying, especially when input is truncated. In fact, fgets leaves a lot to be desired. Did you get the correct string or was there more? Is there a newline at the end? For that matter, is the end 20 bytes away or 32768 bytes away? It would be nice if you didn't need to count that many bytes twice -- once with fgets and once with strlen, just to remove a newline that you didn't want.
Things like fscanf don't necessarily work as intended in this situation unless you have C99's "scanset" feature available, and then that will automatically add a null terminator, if you have enough room. The return value of any of the scanf family is your friend in determining whether success or failure occurred.
You can avoid the null terminator by using %NNc, where NN is the width, but if there's a \t in those NN bytes, then you need to separate it and move it to the next field, except that means bytes in the next field must be moved to the field after that one, and the 90th field will need its bytes moved to the 91st field... And hopefully you only need to do that once... Obviously that isn't actually a solution either.
Given those reasons, I feel it's easier just to read until you encounter one of the expected delimiters and let you decide the behavior of the function when the size specified is too small for a null terminator, yet large enough to fill your buffer. Anyway, here's the code. I think it's pretty straightforward:
/*
* Read a token.
*
* tok: The buffer used to store the token.
* max: The maximum number of characters to store in the buffer.
* delims: A string containing the individual delimiter bytes.
* fileptr: The file pointer to read the token from.
*
* Return value:
* - max: The buffer is full. In this case, the string _IS NOT_ null terminated.
* This may or may not be a problem: it's your choice.
* - (size_t)-1: An I/O error occurred before the last delimiter
* (just like with `fgets`, use `feof`).
* - any other value: The length of the token as `strlen` would return.
* In this case, the string _IS_ null terminated.
*/
size_t
read_token(char *restrict tok, size_t max, const char *restrict delims,
FILE *restrict fileptr)
{
int c;
size_t n;
for (n = 0; n < max && (c = getchar()) != EOF &&
strchr(delims, c) == NULL; ++n)
*tok++ = c;
if (c == EOF)
return (size_t)-1;
if (n == max)
return max;
*tok = 0;
return n;
}
Usage is pretty straightforward as well:
#include <stdio.h>
#include <stdlib.h>
typedef struct person {
char name[20];
char address[30];
char phone[20];
} Person;
int
main(void)
{
FILE *read_file;
Person temp;
size_t line_num;
size_t len;
int c;
int exit_status = EXIT_SUCCESS;
read_file = fopen("read.txt", "r");
if (read_file == NULL) {
fprintf(stderr, "Error opening read.txt\n");
return 1;
}
for (line_num = 0;; ++line_num) {
/*
* Used for detecting early EOF
* (e.g. the last line contains only a name).
*/
temp.name[0] = temp.phone[0] = 0;
len = read_token(temp.name, sizeof(temp.name), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.address, sizeof(temp.address), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.phone, sizeof(temp.phone), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
// Do something with the input here. Example:
printf("Entry %zu:\n"
"\tName: %.*s\n"
"\tAddress: %.*s\n"
"\tPhone: %.*s\n\n",
line_num + 1,
(int)sizeof(temp.name), temp.name,
(int)sizeof(temp.address), temp.address,
(int)sizeof(temp.phone), temp.phone);
}
if (ferror(read_file)) {
fprintf(stderr, "error reading from file\n");
exit_status = EXIT_FAILURE;
}
else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) {
fprintf(stderr, "Unexpected end of file while reading entry %zu\n",
line_num + 1);
exit_status = EXIT_FAILURE;
}
//else feof(read_file) is still true, but we parsed a full entry/record
fclose(read_file);
return exit_status;
}
Notice how the exact same 8 lines of code appear in the read loop to handle the return value of read_token? Because of that, I think there's probably room for another function to call read_token and handle its return value, allowing main to simply call this "read_token handler", but I think the code above gives you the basic idea about how to work with read_token and how it can apply in your situation. You might change the behavior in some way, if you like, but the read_token function above would suit me rather well when working with delimited input like this (things would be a bit more complex when you add quoted fields into the mix, but not much more complex as far as I can tell). You can decide what happens with max being returned. I opted for it being considered an error, but you might think otherwise. You might even add an extra getchar when n == max and consider max being a successful return value and something like (size_t)-2 being the "token too large" error indicator instead.

Read files separated by tab in c

I am really new to C, and the reading files thing drives me crazy...
I want read a file including name, born place and phone number, etc. All separated by tab
The format might be like this:
Bob Jason Los Angeles 33333333
Alice Wong Washington DC 111-333-222
So I create a struct to record it.
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
I tried many ways to read this file into struct but it failed.
I tired fread:
read_file = fopen("read.txt", "r");
Person temp;
fread(&temp, sizeof(Person), 100, read_file);
printf("%s %s %s \n", temp.name, temp.address, temp.phone);
But char string does not recorded into temp separated by tab, it read the whole file into temp.name and get weird output.
Then I tried fscanf and sscanf, those all not working for separating tab
fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);
Or
fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);
This separates the string by space, so I get Bob and Jason separately, while indeed, I need to get "Bob Jason" as one char string. And I did separate these format by tab when I created the text file.
Same for sscanf, I tried different ways many times...
Please help...
I suggest:
Use fgets to read the text line by line.
Use strtok to separate the contents of the line by using tab as the delimiter.
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
char* token = strtok(line, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
// Copy token at most the number of characters
// temp.name can hold. Similar logic applies to address
// and phone number.
temp.name[0] = '\0';
strncat(temp.name, token, sizeof(temp.name)-1);
}
token = strtok(NULL, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.address[0] = '\0';
strncat(temp.address, token, sizeof(temp.address)-1);
}
token = strtok(NULL, "\n");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.phone[0] = '\0';
strncat(temp.phone, token, sizeof(temp.phone)-1);
}
Update
Using a helper function, the code can be reduced in size. (Thanks #chux)
// The helper function.
void copyToken(char* destination,
char* source,
size_t maxLen;
char const* delimiter)
{
char* token = strtok(source, delimiter);
if ( token != NULL )
{
destination[0] = '\0';
strncat(destination, token, maxLen-1);
}
}
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
copyToken(temp.name, line, sizeof(temp.name), "\t");
copyToken(temp.address, NULL, sizeof(temp.address), "\t");
copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");
This is only for demonstration, there are better ways to initialize variables, but to illustrate your main question i.e. reading a file delimited by tabs, you can write a function something like this:
Assuming a strict field definition, and your struct definition you can get tokens using strtok().
//for a file with constant field definitions
void GetFileContents(char *file, PERSON *person)
{
char line[260];
FILE *fp;
char *buf=0;
char temp[80];
int i = -1;
fp = fopen(file, "r");
while(fgets(line, 260, fp))
{
i++;
buf = strtok(line, "\t\n");
if(buf) strcpy(person[i].name, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].address, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].phone, buf);
//Note: if you have more fields, add more strtok/strcpy sections
//Note: This method will ONLY work for consistent number of fields.
//If variable number of fields, suggest 2 dimensional string array.
}
fclose(fp);
}
Call it in main() like this:
int main(void)
{
//...
PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere
//and there are better ways
//this is just for illustration
pPerson = &person[0];//initialize pointer to person
GetFileContents(filename, pPerson); //call function to populate person.
//...
return 0;
}
First thing,
fread(&temp, sizeof(temp), 100, read_file);
will not work because the fields are not fixed width, so it will always read 20 characters for name 30 for address and so on, which is not always the correct thing to do.
You need to read one line at a time, and then parse the line, you can use any method you like to read a like, a simple one is by using fgets() like this
char line[100];
Person persons[100];
int index;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
persons[i++] = parseLineAndExtractPerson(line);
}
Now we need a function to parse the line and store the data in you Person struct instance
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
Here is a sample implementation of a loop to read at most 100 records
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/path/to/the/file.type", "r");
if (read_file == NULL)
return -1;
index = 0;
while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL))
{
size_t length;
/* remove the '\n' left by `fgets()'. */
length = strlen(line);
if ((length > 0) && (line[length - 1] == '\n'))
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Here is a complete program that does what I think you need
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/home/iharob/data.dat", "r");
if (read_file == NULL)
return -1;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
size_t length;
length = strlen(line);
if (line[length - 1] == '\n')
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Parsing strings returned by fgets can be very annoying, especially when input is truncated. In fact, fgets leaves a lot to be desired. Did you get the correct string or was there more? Is there a newline at the end? For that matter, is the end 20 bytes away or 32768 bytes away? It would be nice if you didn't need to count that many bytes twice -- once with fgets and once with strlen, just to remove a newline that you didn't want.
Things like fscanf don't necessarily work as intended in this situation unless you have C99's "scanset" feature available, and then that will automatically add a null terminator, if you have enough room. The return value of any of the scanf family is your friend in determining whether success or failure occurred.
You can avoid the null terminator by using %NNc, where NN is the width, but if there's a \t in those NN bytes, then you need to separate it and move it to the next field, except that means bytes in the next field must be moved to the field after that one, and the 90th field will need its bytes moved to the 91st field... And hopefully you only need to do that once... Obviously that isn't actually a solution either.
Given those reasons, I feel it's easier just to read until you encounter one of the expected delimiters and let you decide the behavior of the function when the size specified is too small for a null terminator, yet large enough to fill your buffer. Anyway, here's the code. I think it's pretty straightforward:
/*
* Read a token.
*
* tok: The buffer used to store the token.
* max: The maximum number of characters to store in the buffer.
* delims: A string containing the individual delimiter bytes.
* fileptr: The file pointer to read the token from.
*
* Return value:
* - max: The buffer is full. In this case, the string _IS NOT_ null terminated.
* This may or may not be a problem: it's your choice.
* - (size_t)-1: An I/O error occurred before the last delimiter
* (just like with `fgets`, use `feof`).
* - any other value: The length of the token as `strlen` would return.
* In this case, the string _IS_ null terminated.
*/
size_t
read_token(char *restrict tok, size_t max, const char *restrict delims,
FILE *restrict fileptr)
{
int c;
size_t n;
for (n = 0; n < max && (c = getchar()) != EOF &&
strchr(delims, c) == NULL; ++n)
*tok++ = c;
if (c == EOF)
return (size_t)-1;
if (n == max)
return max;
*tok = 0;
return n;
}
Usage is pretty straightforward as well:
#include <stdio.h>
#include <stdlib.h>
typedef struct person {
char name[20];
char address[30];
char phone[20];
} Person;
int
main(void)
{
FILE *read_file;
Person temp;
size_t line_num;
size_t len;
int c;
int exit_status = EXIT_SUCCESS;
read_file = fopen("read.txt", "r");
if (read_file == NULL) {
fprintf(stderr, "Error opening read.txt\n");
return 1;
}
for (line_num = 0;; ++line_num) {
/*
* Used for detecting early EOF
* (e.g. the last line contains only a name).
*/
temp.name[0] = temp.phone[0] = 0;
len = read_token(temp.name, sizeof(temp.name), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.address, sizeof(temp.address), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.phone, sizeof(temp.phone), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
// Do something with the input here. Example:
printf("Entry %zu:\n"
"\tName: %.*s\n"
"\tAddress: %.*s\n"
"\tPhone: %.*s\n\n",
line_num + 1,
(int)sizeof(temp.name), temp.name,
(int)sizeof(temp.address), temp.address,
(int)sizeof(temp.phone), temp.phone);
}
if (ferror(read_file)) {
fprintf(stderr, "error reading from file\n");
exit_status = EXIT_FAILURE;
}
else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) {
fprintf(stderr, "Unexpected end of file while reading entry %zu\n",
line_num + 1);
exit_status = EXIT_FAILURE;
}
//else feof(read_file) is still true, but we parsed a full entry/record
fclose(read_file);
return exit_status;
}
Notice how the exact same 8 lines of code appear in the read loop to handle the return value of read_token? Because of that, I think there's probably room for another function to call read_token and handle its return value, allowing main to simply call this "read_token handler", but I think the code above gives you the basic idea about how to work with read_token and how it can apply in your situation. You might change the behavior in some way, if you like, but the read_token function above would suit me rather well when working with delimited input like this (things would be a bit more complex when you add quoted fields into the mix, but not much more complex as far as I can tell). You can decide what happens with max being returned. I opted for it being considered an error, but you might think otherwise. You might even add an extra getchar when n == max and consider max being a successful return value and something like (size_t)-2 being the "token too large" error indicator instead.

Why do I keep getting a "warning: comparison between pointer and integer error"?

Does anyone know why my program doesn't read from my delimited file? I thought it would print everything from my delimited file after the program ran through my printPropertyListing() method at the bottom but instead it gives me the error message warning "warning: comparison between pointer and integer". It's telling me the error is on the beginning line of my for loop in the main method. Any solutions please?
Here is what my delimited file looks like:
123 Cherry Tree Drive#330#Condo#2#1#275900#Toronto#
14 Leaside Lane#N/A#House#4#2#445500#Brampton#
2478 Waterfront Avenue#N/A#House#5#3#899900#Mississauga#
7 Lucky Lane#1206#Condo#3#2#310000#Toronto#
51 West Street#32#Townhouse#4#2#450000#Brampton#
193 Crystal Road#1519#Condo#1#1#250750#Toronto#
3914 Tangerine Terrace#N/A#House#3#1#427750#Mississauga#
10 Redding Road#N/A#House#4#2#512350#Toronto#
76 Old School Avenue#227#Townhouse#3#2#475000#Toronto#
90 Brookhaven Terrace#N/A#House#4#2#512750#Brampton#
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char buildingType[10];
int numBedrooms;
int numBathrooms;
}Propertylisting;
typedef struct {
Propertylisting propertylisting;
char address[100];
char unitNum [10];
char city [50];
int listPrice;
} Listing;
void parseListings(FILE *in, Listing listing[], int arraySize);
void printPropertyListing(Listing l);
int main()
{
Listing listing[10];
FILE *fp = fopen("PropertyListings.txt", "r");
if (fp == NULL)
{
printf("Could not open file!");
exit(1);
}
else
{
parseListings(fp, listing, 10);
if (listing == 0)
{
printf("No Property data found.");
exit(1);
}
printf("\nNumber of listings in file: %d\n\n", listing);
int i;
for (i = 0; i < listing; i++)
{
printPropertyListing(listing[i]);
printf("\n");
}
fclose(fp);
}
return 0;
}
void parseListings(FILE *in, Listing listing[], int arraySize)
{
// Holds the current index of the Listing array
int n = 0;
// Set value as empty string
char line[256];
// A token pointer that the strtok() function returns
char *token;
char *delimiter = "#";
while (!feof(in)&& n > arraySize)
{
fgets(line, 256, in);
// Read the address
token = strtok(line, delimiter);
strcpy(listing[n].address, token);
// Read the unitNum
token = strtok(NULL, delimiter);
strcpy(listing[n].unitNum, token);
// Read the building typede
token = strtok(NULL, delimiter);
strcpy(listing[n].propertylisting.buildingType, token);
token = strtok(NULL, delimiter);
int numBedrooms = strtol(token, NULL, 10);
listing[n].propertylisting.numBedrooms = numBedrooms;
token = strtok(NULL, delimiter);
int numBathrooms = strtol(token, NULL, 10);
listing[n].propertylisting.numBathrooms = numBathrooms;
token = strtok(NULL, delimiter);
int listPrice = strtol(token, NULL, 10);
listing[n].listPrice = listPrice;
token = strtok(NULL, delimiter);
strcpy(listing[n].city, token);
n++;
}
return n;
}
void printPropertyListing(Listing l)
{
printf("%s %s %s\n%s %d %d %d\n\n",
l.address,
l.unitNum,
l.city,
l.propertylisting.buildingType,
l.propertylisting.numBedrooms,
l.propertylisting.numBathrooms,
l.listPrice);
}
The problem is in your for loop:
for (i = 0; i < listing; i++)
i is of type int and listing is of type Listing [10];
As pointed out by others, your listing variable is an array (which is actually a pointer to the start of a block of memory being used as an array). This means you are attempting to compare a pointer to an int, which does not make sense.
It looks like you are trying to compare to the size of the array, and since your parseListings function already returns the number of listings it has parsed, you can instead do the following:
int numListings = parseListings(fp, listing, 10);
if (numListings == 0) {
printf("No Property data found.");
exit(1);
}
printf("\nNumber of listings in file: %d\n\n", numListings);
int i;
for (i = 0; i < numListings; i++) {
printPropertyListing(listing[i]);
printf("\n");
}
You will note that I changed a few other locations where you were also using the listing variable, but I suspect you had wanted to check against the number of listings.
In addition to this, there is the points which Jens made about other bugs in your program, which would make this the perfect time to learn about the while 1: ... break; paradigm.
while (n < arraySize) {
fgets(line, 256, in);
if(feof(in)) {
break;
}
Credit to other people for pointing out all of these bugs; I have just attempted to formulate all of their responses into a single coherent guide to things you should consider for improving this code specifically and all of your code in general.
Edit: additionally, the return type of parseListing should be changed to int in order to match the fact that you return n;

array of strings access error

When I print char** surname and char** first, I get some strange outputs. I am not sure if I am doing the malloc correctly or if I'm doing something else incorrectly.
The Input -> names1.txt
The outputs
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main ()
{
int size, i;
char **surname, **first, *middle_init, dummy, str[80];
FILE *fp_input = fopen("names1.txt", "r");
fscanf(fp_input, "%d%c", &size, &dummy); // gets size of array from file
/* dynamic memory allocation */
middle_init = (char*)malloc(size * sizeof(char));
surname = (char**)malloc(size * sizeof(char*));
first = (char**)malloc(size * sizeof(char*));
for (i = 0; i < size; i++)
{
surname[i] = (char*)malloc(17 * sizeof(char));
first[i] = (char*)malloc(17 * sizeof(char));
} // for
/* reads from file and assigns value to arrays */
i = 0;
strcpy(middle_init, "");
while (fgets(str, 80, fp_input) != NULL)
{
surname[i] = strtok(str, ", \n");
first[i] = strtok(NULL, ". ");
strcat(middle_init, strtok(NULL, ". "));
i++;
} // while
/* prints arrays */
for (i = 0; i < size; i++)
printf("%s %s\n", surname[i], first[i]);
return 0;
} // main
A casual look at the code suggests:
You must use strcpy() or a variant on the theme to copy the string found by strtok() into the surname, etc.
The way you've written it, you throw away your allocated memory.
You get the repeated output because you're storing pointers to the string you use to hold the line in the surname and first arrays. That string only holds the last line when you do the printing. This and the previous point are corollaries of the first point.
You only allocate a single character for the middle initials. You then use strcat() to treat them as strings. I recommend treating middle initials as strings, much like the other names. Or, since you aren't required to print them, you might decide to ignore middle initials altogether.
Using 17 instead of enum { NAME_LENGTH = 17 }; or equivalent is not a good idea.
There are undoubtedly other issues too.
I guess you've not reached structures in your course of study yet. If you have covered structures, you should probably use a structure type to represent a complete name, and use a single array of names instead of parallel arrays. This will likely simplify memory management too; you'd use fixed size array elements in the structure, so you'd only have to make one allocation for each name.
The code below produces the output:
Ryan Elizabeth
McIntyre O
Cauble-Chantrenne Kristin
Larson Lois
Thorpe Trinity
Ruiz Pedro
In this code, the err_exit() function is vastly valuable because it makes error reporting into a one-line call, rather than a 4-line paragraph, which means you're more likely to do the error checking. It is a basic use of variable length argument lists, and you may not understand it yet, but it is extremely convenient and powerful. The only functions that could be error checked but aren't are the fclose() and printf(). If you're reading a file, there's little benefit to checking fclose(); if you're writing and fclose() fails, you may have run out of disk space or something like that and it is probably appropriate to report the error. You could add <errno.h> to the list of headers and report on errno and strerror(errno) if you wanted to improve the error reporting more. The code frees the allocated memory; valgrind gives it a clean bill of health.
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void err_exit(const char *fmt, ...);
int main(void)
{
enum { NAME_SIZE = 25 };
const char *file = "names1.txt";
int size, i;
char **surname, **first, str[80];
FILE *fp_input = fopen(file, "r");
if (fp_input == NULL)
err_exit("Failed to open file %s\n", file);
if (fgets(str, sizeof(str), fp_input) == 0)
err_exit("Unexpected EOF on file %s\n", file);
if (sscanf(str, "%d", &size) != 1)
err_exit("Did not find integer in line: %s\n", str);
if (size <= 0 || size > 1000)
err_exit("Integer %d out of range 1..1000\n", size);
if ((surname = (char**)malloc(size * sizeof(char*))) == 0 ||
(first = (char**)malloc(size * sizeof(char*))) == 0)
err_exit("Memory allocation failure\n");
for (i = 0; i < size; i++)
{
if ((surname[i] = (char*)malloc(NAME_SIZE * sizeof(char))) == 0 ||
(first[i] = (char*)malloc(NAME_SIZE * sizeof(char))) == 0)
err_exit("Memory allocation failure\n");
}
for (i = 0; i < size && fgets(str, sizeof(str), fp_input) != NULL; i++)
{
char *tok_s = strtok(str, ",. \n");
char *tok_f = strtok(NULL, ". ");
if (tok_s == 0 || tok_f == 0)
err_exit("Failed to read surname and first name from: %s\n", str);
if (strlen(tok_s) >= NAME_SIZE || strlen(tok_f) >= NAME_SIZE)
err_exit("Name(s) %s and %s are too long (max %d)\n", tok_s, tok_f, NAME_SIZE-1);
strcpy(surname[i], tok_s);
strcpy(first[i], tok_f);
}
if (i != size)
err_exit("Only read %d names\n", i);
fclose(fp_input);
/* prints arrays */
for (i = 0; i < size; i++)
printf("%s %s\n", surname[i], first[i]);
for (i = 0; i < size; i++)
{
free(surname[i]);
free(first[i]);
}
free(surname);
free(first);
return 0;
}
static void err_exit(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(1);
}
here:
surname[i] = (char*)malloc(17 * sizeof(char));
first[i] = (char*)malloc(17 * sizeof(char));
..
surname[i] = strtok(str, ", \n");
first[i] = strtok(NULL, ". ");
you allocate memory for surname and first and you don't use that memory because you assign to it the string returned from strtok which you should not do anyway because it points to a static buffer used by the function for parsing, you could use strdup instead:
while (fgets(str, 80, fp_input) != NULL) {
surname[i] = strdup(strtok(str, ", \n"));
first[i] = strdup(strtok(NULL, ". "));
middle_init[i] = strtok(NULL, ". ")[0];
i++;
} // while
/* prints arrays */
for (i = 0; i < size; i++)
printf("%s %s %c\n", surname[i], first[i], middle_init[i]);
strdup will allocate memory and copy the string, this way you avoid hard coding the string length too, you should free that memory when you're done, also note that middile_init is a char array, so I just assign 1 char.

Resources