Related
I am trying to create a program that will read line by line from stdin, search that line for the start and end of a given word and output all the matching words. Here is the code:
int main()
{
char buffer[100];
char **words = NULL;
int word_count = 0;
while (fgets(buffer, sizeof(buffer), stdin) != NULL) {
int length = strlen(buffer);
if (buffer[length - 1] == '\n') {
word_count = count_words(buffer, FIRSTCHAR);
if (word_count > 0) {
words = get_words(buffer, FIRSTCHAR, LASTCHAR);
for (int i = 0; i < word_count; ++i) {
printf("%s\n", words[i]);
free(words[i]);
}
free(words);
}
}
}
return 0;
}
I got the basic functionality working, but I am relying on fgets() with a fixed buffer size.
What I would like is to dynamically allocate a memory buffer with a size based on the length of each line.
I can only see one way of going about solving it, which is to iterate over input with fgetc and increment a counter until end of line and use that counter in place of sizeof(buffer), but I don't know how I would get fgetc to read the correct relevant line.
Is there any smart way of solving this?
but I am relying on fgets() with a fixed buffer size. What I would like is to dynamically allocate a memory buffer with a size based on the length of each line
I did wrote a version of fgets for another SO answer that reads the whole line and returns a
malloc allocated pointer with the contents of the whole line. This is the
code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char *fgets_long(FILE *fp)
{
size_t size = 0, currlen = 0;
char line[1024];
char *ret = NULL, *tmp;
while(fgets(line, sizeof line, fp))
{
int wholeline = 0;
size_t len = strlen(line);
if(line[len - 1] == '\n')
{
line[len-- - 1] = 0;
wholeline = 1;
}
if(currlen + len >= size)
{
// we need more space in the buffer
size += (sizeof line) - (size ? 1 : 0);
tmp = realloc(ret, size);
if(tmp == NULL)
break; // return all we've got so far
ret = tmp;
}
memcpy(ret + currlen, line, len + 1);
currlen += len;
if(wholeline)
break;
}
if(ret)
{
tmp = realloc(ret, currlen + 1);
if(tmp)
ret = tmp;
}
return ret;
}
The trick is to check if the newline was read. If it was read, then you can
return the buffer, otherwise it reallocates the buffer with sizeof line more
bytes and appends it to the buffer. You could use this function if you like.
An alternative would be if you are using a POSIX system and/or are compiling with GNU GCC, then you
can use getline as well.
void foo(FILE *fp)
{
char *line = NULL;
size_t len = 0;
if(getline(&line, &len, fp) < 0)
{
free(line); // man page says even on failure you should free
fprintf(stderr, "could not read whole line\n");
return;
}
printf("The whole line is: '%s'\n", line);
free(line);
return;
}
the function: getline() does just what you want. The syntax:
ssize_t getline(char **lineptr, size_t *n, FILE *stream);
The function is exposed in the stdio.h header file and usually requires something like: #define _POSIX_C_SOURCE 200809L or #define _GNU_SOURCE as the first line in the file that calls getline()
Strongly suggest reading/understanding the MAN page for `getline() for all the grubby details.
I am posed with a situation where my function does exactly what I want except handle higher amounts of input.
I initially thought to process each character one by one but was running into problems doing this. So fscanf not only does what I want it to do but it is essential in reading in only one line. I noticed, I cannot reallocate space for bigger array this way though. I have tried using format specifiers i.e. %*s to include a specific amount of buffer space before hand but this still does not work.
I have noticed also, I would have no way of knowing the size of the string I am reading in.
Here is my attempt and thoughts:
#define LINE_MAX 1000
char* getline(FILE* inputStream)
{
int capacity = LINE_MAX;
char* line = malloc(capacity * sizeof(char));
int ch;
/* if (sizeof(capacity) == sizeof(line)) { // Not a valid comparison? Too late?
capacity *= 2;
line = realloc(line, capacity * sizeof(line));
} */
if (fscanf(stream, "%[^\n]s", line) == 1) {
ch = fgetc(inputStream);
if (ch != '\n' && ch != EOF) {
fscanf(inputStream, "%*[^\n]");
fscanf(inputStream, "%*c");
}
free(line);
return line;
}
free(line);
return NULL;
}
I am new to memory allocation in general but I feel as though I had a good idea of what to do here. Turns out I was wrong.
Here is an example to read a line and store it in a Character array.
#include <stdio.h>
#include <stdlib.h>
int main(void) {
signed char *str;
int c;
int i;
int size = 10;
str = malloc(size*sizeof(char));
for(i=0;(c=getchar()) !='\n' && c != EOF;++i){
if( i == size){
size = 2*size;
str = realloc(str, size*sizeof(char));
if(str == NULL){
printf("Error Unable to Grow String! :(");
exit(-1);
}
}
str[i] = c;
}
if(i == size){
str = realloc(str, (size+1)*sizeof(char));
if(str == NULL){
printf("Error Unable to Grow String! :(");
exit(-1);
}
}
str[i] = '\0';
printf("My String : %s", str);
return 0;
}
The array is resized to twice it's original size if current array can't hold the characters read from input.
I am really new to C, and the reading files thing drives me crazy...
I want read a file including name, born place and phone number, etc. All separated by tab
The format might be like this:
Bob Jason Los Angeles 33333333
Alice Wong Washington DC 111-333-222
So I create a struct to record it.
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
I tried many ways to read this file into struct but it failed.
I tired fread:
read_file = fopen("read.txt", "r");
Person temp;
fread(&temp, sizeof(Person), 100, read_file);
printf("%s %s %s \n", temp.name, temp.address, temp.phone);
But char string does not recorded into temp separated by tab, it read the whole file into temp.name and get weird output.
Then I tried fscanf and sscanf, those all not working for separating tab
fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);
Or
fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);
This separates the string by space, so I get Bob and Jason separately, while indeed, I need to get "Bob Jason" as one char string. And I did separate these format by tab when I created the text file.
Same for sscanf, I tried different ways many times...
Please help...
I suggest:
Use fgets to read the text line by line.
Use strtok to separate the contents of the line by using tab as the delimiter.
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
char* token = strtok(line, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
// Copy token at most the number of characters
// temp.name can hold. Similar logic applies to address
// and phone number.
temp.name[0] = '\0';
strncat(temp.name, token, sizeof(temp.name)-1);
}
token = strtok(NULL, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.address[0] = '\0';
strncat(temp.address, token, sizeof(temp.address)-1);
}
token = strtok(NULL, "\n");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.phone[0] = '\0';
strncat(temp.phone, token, sizeof(temp.phone)-1);
}
Update
Using a helper function, the code can be reduced in size. (Thanks #chux)
// The helper function.
void copyToken(char* destination,
char* source,
size_t maxLen;
char const* delimiter)
{
char* token = strtok(source, delimiter);
if ( token != NULL )
{
destination[0] = '\0';
strncat(destination, token, maxLen-1);
}
}
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
copyToken(temp.name, line, sizeof(temp.name), "\t");
copyToken(temp.address, NULL, sizeof(temp.address), "\t");
copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");
This is only for demonstration, there are better ways to initialize variables, but to illustrate your main question i.e. reading a file delimited by tabs, you can write a function something like this:
Assuming a strict field definition, and your struct definition you can get tokens using strtok().
//for a file with constant field definitions
void GetFileContents(char *file, PERSON *person)
{
char line[260];
FILE *fp;
char *buf=0;
char temp[80];
int i = -1;
fp = fopen(file, "r");
while(fgets(line, 260, fp))
{
i++;
buf = strtok(line, "\t\n");
if(buf) strcpy(person[i].name, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].address, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].phone, buf);
//Note: if you have more fields, add more strtok/strcpy sections
//Note: This method will ONLY work for consistent number of fields.
//If variable number of fields, suggest 2 dimensional string array.
}
fclose(fp);
}
Call it in main() like this:
int main(void)
{
//...
PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere
//and there are better ways
//this is just for illustration
pPerson = &person[0];//initialize pointer to person
GetFileContents(filename, pPerson); //call function to populate person.
//...
return 0;
}
First thing,
fread(&temp, sizeof(temp), 100, read_file);
will not work because the fields are not fixed width, so it will always read 20 characters for name 30 for address and so on, which is not always the correct thing to do.
You need to read one line at a time, and then parse the line, you can use any method you like to read a like, a simple one is by using fgets() like this
char line[100];
Person persons[100];
int index;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
persons[i++] = parseLineAndExtractPerson(line);
}
Now we need a function to parse the line and store the data in you Person struct instance
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
Here is a sample implementation of a loop to read at most 100 records
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/path/to/the/file.type", "r");
if (read_file == NULL)
return -1;
index = 0;
while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL))
{
size_t length;
/* remove the '\n' left by `fgets()'. */
length = strlen(line);
if ((length > 0) && (line[length - 1] == '\n'))
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Here is a complete program that does what I think you need
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/home/iharob/data.dat", "r");
if (read_file == NULL)
return -1;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
size_t length;
length = strlen(line);
if (line[length - 1] == '\n')
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Parsing strings returned by fgets can be very annoying, especially when input is truncated. In fact, fgets leaves a lot to be desired. Did you get the correct string or was there more? Is there a newline at the end? For that matter, is the end 20 bytes away or 32768 bytes away? It would be nice if you didn't need to count that many bytes twice -- once with fgets and once with strlen, just to remove a newline that you didn't want.
Things like fscanf don't necessarily work as intended in this situation unless you have C99's "scanset" feature available, and then that will automatically add a null terminator, if you have enough room. The return value of any of the scanf family is your friend in determining whether success or failure occurred.
You can avoid the null terminator by using %NNc, where NN is the width, but if there's a \t in those NN bytes, then you need to separate it and move it to the next field, except that means bytes in the next field must be moved to the field after that one, and the 90th field will need its bytes moved to the 91st field... And hopefully you only need to do that once... Obviously that isn't actually a solution either.
Given those reasons, I feel it's easier just to read until you encounter one of the expected delimiters and let you decide the behavior of the function when the size specified is too small for a null terminator, yet large enough to fill your buffer. Anyway, here's the code. I think it's pretty straightforward:
/*
* Read a token.
*
* tok: The buffer used to store the token.
* max: The maximum number of characters to store in the buffer.
* delims: A string containing the individual delimiter bytes.
* fileptr: The file pointer to read the token from.
*
* Return value:
* - max: The buffer is full. In this case, the string _IS NOT_ null terminated.
* This may or may not be a problem: it's your choice.
* - (size_t)-1: An I/O error occurred before the last delimiter
* (just like with `fgets`, use `feof`).
* - any other value: The length of the token as `strlen` would return.
* In this case, the string _IS_ null terminated.
*/
size_t
read_token(char *restrict tok, size_t max, const char *restrict delims,
FILE *restrict fileptr)
{
int c;
size_t n;
for (n = 0; n < max && (c = getchar()) != EOF &&
strchr(delims, c) == NULL; ++n)
*tok++ = c;
if (c == EOF)
return (size_t)-1;
if (n == max)
return max;
*tok = 0;
return n;
}
Usage is pretty straightforward as well:
#include <stdio.h>
#include <stdlib.h>
typedef struct person {
char name[20];
char address[30];
char phone[20];
} Person;
int
main(void)
{
FILE *read_file;
Person temp;
size_t line_num;
size_t len;
int c;
int exit_status = EXIT_SUCCESS;
read_file = fopen("read.txt", "r");
if (read_file == NULL) {
fprintf(stderr, "Error opening read.txt\n");
return 1;
}
for (line_num = 0;; ++line_num) {
/*
* Used for detecting early EOF
* (e.g. the last line contains only a name).
*/
temp.name[0] = temp.phone[0] = 0;
len = read_token(temp.name, sizeof(temp.name), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.address, sizeof(temp.address), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.phone, sizeof(temp.phone), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
// Do something with the input here. Example:
printf("Entry %zu:\n"
"\tName: %.*s\n"
"\tAddress: %.*s\n"
"\tPhone: %.*s\n\n",
line_num + 1,
(int)sizeof(temp.name), temp.name,
(int)sizeof(temp.address), temp.address,
(int)sizeof(temp.phone), temp.phone);
}
if (ferror(read_file)) {
fprintf(stderr, "error reading from file\n");
exit_status = EXIT_FAILURE;
}
else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) {
fprintf(stderr, "Unexpected end of file while reading entry %zu\n",
line_num + 1);
exit_status = EXIT_FAILURE;
}
//else feof(read_file) is still true, but we parsed a full entry/record
fclose(read_file);
return exit_status;
}
Notice how the exact same 8 lines of code appear in the read loop to handle the return value of read_token? Because of that, I think there's probably room for another function to call read_token and handle its return value, allowing main to simply call this "read_token handler", but I think the code above gives you the basic idea about how to work with read_token and how it can apply in your situation. You might change the behavior in some way, if you like, but the read_token function above would suit me rather well when working with delimited input like this (things would be a bit more complex when you add quoted fields into the mix, but not much more complex as far as I can tell). You can decide what happens with max being returned. I opted for it being considered an error, but you might think otherwise. You might even add an extra getchar when n == max and consider max being a successful return value and something like (size_t)-2 being the "token too large" error indicator instead.
I am really new to C, and the reading files thing drives me crazy...
I want read a file including name, born place and phone number, etc. All separated by tab
The format might be like this:
Bob Jason Los Angeles 33333333
Alice Wong Washington DC 111-333-222
So I create a struct to record it.
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
I tried many ways to read this file into struct but it failed.
I tired fread:
read_file = fopen("read.txt", "r");
Person temp;
fread(&temp, sizeof(Person), 100, read_file);
printf("%s %s %s \n", temp.name, temp.address, temp.phone);
But char string does not recorded into temp separated by tab, it read the whole file into temp.name and get weird output.
Then I tried fscanf and sscanf, those all not working for separating tab
fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);
Or
fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);
This separates the string by space, so I get Bob and Jason separately, while indeed, I need to get "Bob Jason" as one char string. And I did separate these format by tab when I created the text file.
Same for sscanf, I tried different ways many times...
Please help...
I suggest:
Use fgets to read the text line by line.
Use strtok to separate the contents of the line by using tab as the delimiter.
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
char* token = strtok(line, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
// Copy token at most the number of characters
// temp.name can hold. Similar logic applies to address
// and phone number.
temp.name[0] = '\0';
strncat(temp.name, token, sizeof(temp.name)-1);
}
token = strtok(NULL, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.address[0] = '\0';
strncat(temp.address, token, sizeof(temp.address)-1);
}
token = strtok(NULL, "\n");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.phone[0] = '\0';
strncat(temp.phone, token, sizeof(temp.phone)-1);
}
Update
Using a helper function, the code can be reduced in size. (Thanks #chux)
// The helper function.
void copyToken(char* destination,
char* source,
size_t maxLen;
char const* delimiter)
{
char* token = strtok(source, delimiter);
if ( token != NULL )
{
destination[0] = '\0';
strncat(destination, token, maxLen-1);
}
}
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
copyToken(temp.name, line, sizeof(temp.name), "\t");
copyToken(temp.address, NULL, sizeof(temp.address), "\t");
copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");
This is only for demonstration, there are better ways to initialize variables, but to illustrate your main question i.e. reading a file delimited by tabs, you can write a function something like this:
Assuming a strict field definition, and your struct definition you can get tokens using strtok().
//for a file with constant field definitions
void GetFileContents(char *file, PERSON *person)
{
char line[260];
FILE *fp;
char *buf=0;
char temp[80];
int i = -1;
fp = fopen(file, "r");
while(fgets(line, 260, fp))
{
i++;
buf = strtok(line, "\t\n");
if(buf) strcpy(person[i].name, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].address, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].phone, buf);
//Note: if you have more fields, add more strtok/strcpy sections
//Note: This method will ONLY work for consistent number of fields.
//If variable number of fields, suggest 2 dimensional string array.
}
fclose(fp);
}
Call it in main() like this:
int main(void)
{
//...
PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere
//and there are better ways
//this is just for illustration
pPerson = &person[0];//initialize pointer to person
GetFileContents(filename, pPerson); //call function to populate person.
//...
return 0;
}
First thing,
fread(&temp, sizeof(temp), 100, read_file);
will not work because the fields are not fixed width, so it will always read 20 characters for name 30 for address and so on, which is not always the correct thing to do.
You need to read one line at a time, and then parse the line, you can use any method you like to read a like, a simple one is by using fgets() like this
char line[100];
Person persons[100];
int index;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
persons[i++] = parseLineAndExtractPerson(line);
}
Now we need a function to parse the line and store the data in you Person struct instance
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
Here is a sample implementation of a loop to read at most 100 records
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/path/to/the/file.type", "r");
if (read_file == NULL)
return -1;
index = 0;
while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL))
{
size_t length;
/* remove the '\n' left by `fgets()'. */
length = strlen(line);
if ((length > 0) && (line[length - 1] == '\n'))
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Here is a complete program that does what I think you need
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/home/iharob/data.dat", "r");
if (read_file == NULL)
return -1;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
size_t length;
length = strlen(line);
if (line[length - 1] == '\n')
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Parsing strings returned by fgets can be very annoying, especially when input is truncated. In fact, fgets leaves a lot to be desired. Did you get the correct string or was there more? Is there a newline at the end? For that matter, is the end 20 bytes away or 32768 bytes away? It would be nice if you didn't need to count that many bytes twice -- once with fgets and once with strlen, just to remove a newline that you didn't want.
Things like fscanf don't necessarily work as intended in this situation unless you have C99's "scanset" feature available, and then that will automatically add a null terminator, if you have enough room. The return value of any of the scanf family is your friend in determining whether success or failure occurred.
You can avoid the null terminator by using %NNc, where NN is the width, but if there's a \t in those NN bytes, then you need to separate it and move it to the next field, except that means bytes in the next field must be moved to the field after that one, and the 90th field will need its bytes moved to the 91st field... And hopefully you only need to do that once... Obviously that isn't actually a solution either.
Given those reasons, I feel it's easier just to read until you encounter one of the expected delimiters and let you decide the behavior of the function when the size specified is too small for a null terminator, yet large enough to fill your buffer. Anyway, here's the code. I think it's pretty straightforward:
/*
* Read a token.
*
* tok: The buffer used to store the token.
* max: The maximum number of characters to store in the buffer.
* delims: A string containing the individual delimiter bytes.
* fileptr: The file pointer to read the token from.
*
* Return value:
* - max: The buffer is full. In this case, the string _IS NOT_ null terminated.
* This may or may not be a problem: it's your choice.
* - (size_t)-1: An I/O error occurred before the last delimiter
* (just like with `fgets`, use `feof`).
* - any other value: The length of the token as `strlen` would return.
* In this case, the string _IS_ null terminated.
*/
size_t
read_token(char *restrict tok, size_t max, const char *restrict delims,
FILE *restrict fileptr)
{
int c;
size_t n;
for (n = 0; n < max && (c = getchar()) != EOF &&
strchr(delims, c) == NULL; ++n)
*tok++ = c;
if (c == EOF)
return (size_t)-1;
if (n == max)
return max;
*tok = 0;
return n;
}
Usage is pretty straightforward as well:
#include <stdio.h>
#include <stdlib.h>
typedef struct person {
char name[20];
char address[30];
char phone[20];
} Person;
int
main(void)
{
FILE *read_file;
Person temp;
size_t line_num;
size_t len;
int c;
int exit_status = EXIT_SUCCESS;
read_file = fopen("read.txt", "r");
if (read_file == NULL) {
fprintf(stderr, "Error opening read.txt\n");
return 1;
}
for (line_num = 0;; ++line_num) {
/*
* Used for detecting early EOF
* (e.g. the last line contains only a name).
*/
temp.name[0] = temp.phone[0] = 0;
len = read_token(temp.name, sizeof(temp.name), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.address, sizeof(temp.address), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.phone, sizeof(temp.phone), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
// Do something with the input here. Example:
printf("Entry %zu:\n"
"\tName: %.*s\n"
"\tAddress: %.*s\n"
"\tPhone: %.*s\n\n",
line_num + 1,
(int)sizeof(temp.name), temp.name,
(int)sizeof(temp.address), temp.address,
(int)sizeof(temp.phone), temp.phone);
}
if (ferror(read_file)) {
fprintf(stderr, "error reading from file\n");
exit_status = EXIT_FAILURE;
}
else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) {
fprintf(stderr, "Unexpected end of file while reading entry %zu\n",
line_num + 1);
exit_status = EXIT_FAILURE;
}
//else feof(read_file) is still true, but we parsed a full entry/record
fclose(read_file);
return exit_status;
}
Notice how the exact same 8 lines of code appear in the read loop to handle the return value of read_token? Because of that, I think there's probably room for another function to call read_token and handle its return value, allowing main to simply call this "read_token handler", but I think the code above gives you the basic idea about how to work with read_token and how it can apply in your situation. You might change the behavior in some way, if you like, but the read_token function above would suit me rather well when working with delimited input like this (things would be a bit more complex when you add quoted fields into the mix, but not much more complex as far as I can tell). You can decide what happens with max being returned. I opted for it being considered an error, but you might think otherwise. You might even add an extra getchar when n == max and consider max being a successful return value and something like (size_t)-2 being the "token too large" error indicator instead.
I need a version of read line that is memory save. I have this "working" solution. But I'm not sure how it behaves with memory. When I enable free(text) it works for a few lines and then I get an error. So now neither text nor result is ever freed although I malloc text. Is that correct ? And why is that so ?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* readFromIn()
{
char* text = malloc(1024);
char* result = fgets(text, 1024, stdin);
if (result[strlen(result) - 1] == 10)
result[strlen(result) - 1] = 0;
//free(text);
return result;
}
I have A LOT of short lines to read with this and I also need stdin to be replaceable with a FILE* handle. There is no need for me to realloc text because I have only short lines.
fgets returns a pointer to the string, so after the fgets line, result will be the same memory address as text. Then when you call free (text); you are returning invalid memory.
You should free the memory in the calling function when you have finished with result
You could also avoid the malloc/free stuff by structuring your code to pass a buffer something like this:
void parent_function ()
{
char *buffer[1024];
while (readFromIn(buffer)) {
// Process the contents of buffer
}
}
char *readFromIn(char *buffer)
{
char *result = fgets(buffer, 1024, stdin);
int len;
// fgets returns NULL on error of end of input,
// in which case buffer contents will be undefined
if (result == NULL) {
return NULL;
}
len = strlen (buffer);
if (len == 0) {
return NULL;
}
if (buffer[len - 1] == '\n') {
buffer[len - 1] = 0;
return buffer;
}
Trying to avoid the malloc/free is probably wise if you are dealing with many small, short lived items so that the memory doesn't get fragmented and it should faster as well.
char *fgets(char *s, int size, FILE *stream) reads in at most one less than size characters from stream and stores them into the buffer pointed to by s. Reading stops after an EOF or a newline. If a newline is read, it is stored into the buffer. A terminating null byte ('\0') is stored after the last character in the buffer.
Return Value: returns s on success, and NULL on error or when end of file occurs while no characters have been read.
So there are 2 critical problems with your code:
You don't check the return value of fgets
You want to deallocate the memory, where this string is stored and return a pointer to this memory. Accessing the memory, where such a pointer (dangling pointer) points to, leads to undefined behaviour.
Your function could look like this:
public char* readFromIn() {
char* text = malloc(1024);
if (fgets(text, 1024, stdin) != NULL) {
int textLen = strlen(text);
if (textLen > 0 && text[textLen - 1] == '\n')
text[textLen - 1] == '\0'; // getting rid of newline character
return text;
}
else {
free(text);
return NULL;
}
}
and then caller of this function should be responsible for deallocating the memory that return value of this function points to.
I know you mentioned that the lines are only short, but none of the solutions provided will work for lines greater than 1024 in length. It is for this reason that I provide a solution which will attempt to read entire lines, and resize the buffer when there's not enough space.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MINIMUM_CAPACITY 16
size_t read_line(char **buffer, size_t *capacity) {
char *buf = *buffer;
size_t cap = *capacity, pos = 0;
if (cap < MINIMUM_CAPACITY) { cap = MINIMUM_CAPACITY; }
for (;;) {
buf = realloc(buf, cap);
if (buf == NULL) { return pos; }
*buffer = buf;
*capacity = cap;
if (fgets(buf + pos, cap - pos, stdin) == NULL) {
break;
}
pos += strcspn(buf + pos, "\n");
if (buf[pos] == '\n') {
break;
}
cap *= 2;
}
return pos;
}
int main(void) {
char *line = NULL;
size_t size = 0;
for (size_t end = read_line(&line, &size); line[end] == '\n'; end = read_line(&line, &size)) {
line[end] = '\0'; // trim '\n' off the end
// process contents of buffer here
}
free(line);
return 0;
}
An ideal solution should be able to operate with a fixed buffer of 1 byte. This requires a more comprehensive understanding of the problem, however. Once achieved, adapting such a solution would achieve the most optimal solution.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *readFromIn(FILE *fp)
{
char text[1024];
size_t len;
if (!fgets(text, sizeof text, fp)) return NULL;
len = strlen(text);
while (len && text[len-1] == '\n') text[--len] = 0;
return strdup(text);
}
Why did no one propose to move the buffer from heap to stack ? This is my solution now:
char input[1024]; // held ready as buffer for fgets
char* readFromIn()
{
char* result = fgets(input, 1024, stdin);
if (result == null)
return "";
if (result[strlen(result) - 1] == '\n')
result[strlen(result) - 1] = 0;
return result;
}