C code to read config file and parse directives - c

I'm trying to read a config file and parse the config directives. So far I have the following code, I need advice on how to improve this or change it. Is this efficient? Thanks!
struct config
{
char host;
char port;
}
void parse_line(char *buf) {
char *line;
if(strstr(buf, "host=") || strstr(buf, "host = ") || strstr(buf, "host= ") || strstr(buf, "host =")) {
line = strstr(buf, "=");
printf("Host: %s", &line[2]);
} else if(strstr(buf, "port=") || strstr(buf, "port = ") || strstr(buf, "port= ") || strstr(buf, "port =")) {
line = strstr(buf, "=");
printf("Port: %s", &line[2]);
}
}
int main(int argc, char *argv[])
{
char *file_name;
FILE *file;
file_name = argv[1];
file = fopen(file_name, "r");
// check if file is NULL, etc..
char buffer[BUFSIZ];
char *line;
int i;
while(fgets(buffer, sizeof(buffer), file) != NULL) {
for(i = 0; i < strlen(buffer); i++) { // iterate through the chars in a line
if(buffer[i] == '#') { // if char is a #, stop processing chars on this line
break;
} else if(buffer[i] == ' ') { // if char is whitespace, continue until something is found
continue;
} else {
parse_line(buffer); // if char is not a # and not whitespace, it is a config directive, parse it
break;
}
}
}
fclose(file);
return 0;
}
I am looking for a way to ignore # if it is a first character on a line, and also lines that are white spaces. I think my code does that, but is that efficient?
EDIT:
Thanks everyone for all the suggestions, I have managed to do this simple code to trim the white spaces, so that I wouldn't need all the strstr() calls.
void trim(char *src)
{
int i, len;
len = strlen(src);
for(i = 0; i < len; i++) {
if(src[i] == ' ') {
continue;
}
if(src[i] == '\n' || src[i] == '#') {
break;
}
printf("%c", src[i]); // prints: host=1.2.3.4
}
}
int main(void)
{
char *str = "host = 1.2.3.4 # this is a comment\n";
trim(str);
return EXIT_SUCCESS;
}
It prints correctly: host=1.2.3.4 but now I need this in a variable to be further parsed. I think I will try to use strcpy.
EDIT 2:
I do not think that strcpy is the right choice. Those chars are printed out in a loop, so every time I use strcpy, the previous char is overwritten. I have tried this, but it does not work because only the host= part is placed into arr. The IP part is not placed into arr.. how can this be fixed..
char arr[sizeof(src)];
for(i = 0; i < len; i++) {
if(src[i] == ' ') {
continue;
}
if(src[i] == '\n' || src[i] == '#') {
break;
}
printf("%c", src[i]); // prints: host=1.2.3.4
arr[i] = src[i];
}
int j;
for(j = 0; j < sizeof(arr); j++) {
printf("%c", arr[j]); //prints: host=
}
EDIT 3:
I found the correct way of placing chars into arr:
int i, count = 0;
for(i = 0; i < len; i++) {
if(src[i] == ' ') {
continue;
}
if(src[i] == '\n' || src[i] == '#') {
break;
}
arr[count] = src[i];
count++;
}

Your implementation is pretty fragile. Parsers really ought to verify syntax and return errors when they see something unexpected. For example, yours should detect missing fields and multiply defined ones.
Fortunately this parsing problem is simple enough for sscanf to handle everything:
skip blank lines,
skip comments
ignore any amount of whitespace
extract the key/value pairs
Here's code:
#include <stdio.h>
#define CONFIG_SIZE (256)
#define HOST_SET (1)
#define PORT_SET (2)
typedef struct config {
unsigned set;
char host[CONFIG_SIZE];
unsigned long port;
} CONFIG;
// Parse the buffer for config info. Return an error code or 0 for no error.
int parse_config(char *buf, CONFIG *config) {
char dummy[CONFIG_SIZE];
if (sscanf(buf, " %s", dummy) == EOF) return 0; // blank line
if (sscanf(buf, " %[#]", dummy) == 1) return 0; // comment
if (sscanf(buf, " host = %s", config->host) == 1) {
if (config->set & HOST_SET) return HOST_SET; // error; host already set
config->set |= HOST_SET;
return 0;
}
if (sscanf(buf, " port = %lu", &config->port) == 1) {
if (config->set & PORT_SET) return PORT_SET; // error; port already set
config->set |= PORT_SET;
return 0;
}
return 3; // syntax error
}
void init_config(CONFIG *config) {
config->set = 0u;
}
void print_config(CONFIG *config) {
printf("[host=%s,port=", config->set & HOST_SET ? config->host : "<unset>");
if (config->set & PORT_SET) printf("%lu]", config->port); else printf("<unset>]");
}
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s CONFIG_FILE\n", argv[0]);
return 1;
}
FILE *f = fopen(argv[1], "r");
char buf[CONFIG_SIZE];
CONFIG config[1];
init_config(config);
int line_number = 0;
while (fgets(buf, sizeof buf, f)) {
++line_number;
int err = parse_config(buf, config);
if (err) fprintf(stderr, "error line %d: %d\n", line_number, err);
}
print_config(config);
return 0;
}
With this input:
# This is a comment
This isn't
# Non-leading comment
host = 123.456.789.10
###
port =42
port= 1
host=fruit.foo.bar
the output is
error line 3: 3
error line 10: 2
error line 11: 1
[host=fruit.foo.bar,port=1]
Note that when the parser discovers a field has already been set, it still uses the latest value in the config. It's easy enough to keep the original instead. I'll let you have that fun.

I think parse_line is a little bit rigid for my taste, I would use strtok
instead. Then you don't have to worry too much about spaces, like you do if you
have a space before the = sign.
Your struct is also wrong, host and port would only hold a character.
Besides port should be an integer. And you need a semicolon ; after the
struct definition.
struct config
{
char host[100];
int port;
};
int parse_line(struct config *config, char *buf)
{
if(config == NULL || buf == NULL)
return 0;
char varname[100];
char value[100];
const char* sep = "=\n"; // get also rid of newlines
char *token;
token = strtok(buf, sep);
strncpy(varname, token, sizeof varname);
varname[sizeof(varname) - 1] = 0; // making sure that varname is C-String
trim(varname);
token = strtok(NULL, sep);
if(token == NULL)
{
// line not in format var=val
return 0;
}
strncpy(value, token, sizeof value);
value[sizeof(varname) - 1] = 0
trim(value);
if(strcmp(varname, "port") == 0)
{
config->port = atoi(value);
return 1;
}
if(strcmp(varname, "host") == 0)
{
strncpy(config->host, value, siezof config->host);
config->host[(sizeof config->host) - 1] = 0;
return 1;
}
// var=val not recognized
return 0;
}
Note that I used a function called trim. This function is not part of the
standard library. Below I posted a possible implementation of such a function.
I like using trim because it gets rid of white spaces. Now you can do this in
main:
struct config config;
// initializing
config.port = 0;
config.host[0] = 0;
int linecnt = 0;
while(fgets(buffer, sizeof(buffer), file) != NULL) {
linecnt++;
trim(buffer);
if(buffer[0] == '#')
continue;
if(!parse_line(&config, buffer))
{
fprintf(stderr, "Error on line %d, ignoring.\n", linecnt);
continue;
}
}
A possible implementation of trim
void rtrim(char *src)
{
size_t i, len;
volatile int isblank = 1;
if(src == NULL) return;
len = strlen(src);
if(len == 0) return;
for(i = len - 1; i > 0; i--)
{
isblank = isspace(src[i]);
if(isblank)
src[i] = 0;
else
break;
}
if(isspace(src[i]))
src[i] = 0;
}
void ltrim(char *src)
{
size_t i, len;
if(src == NULL) return;
i = 0;
len = strlen(src);
if(len == 0) return;
while(src[i] && isspace(src[i]))
i++;
memmove(src, src + i, len - i + 1);
return;
}
void trim(char *src)
{
rtrim(src);
ltrim(src);
}

There are a few ways that you can improve performance:
Calling strstr() in this scenario is inefficient, because the presence of the "host" part of buf can be checked once instead of multiple times every time strstr() is called. Instead, make an if statement that checks if buf begins with "host", then check if buf contains the other elements. The same thing applies to the portion of code checking for the presence of "port".
In the loop in main, instead of doing this:
for(i = 0; i < strlen(buffer); i++) { // iterate through the chars in a line
if(buffer[i] == '#') { // if char is a #, stop processing chars on this line
break;
} else if(buffer[i] == ' ') { // if char is whitespace, continue until something is found
continue;
} else {
parse_line(buffer); // if char is not a # and not whitespace, it is a config directive, parse it
break;
}
do this:
for(i = 0; i < strlen(buffer); i++) { // iterate through the chars in a line
char temp = buffer[i];
if(temp == '#') { // if char is a #, stop processing chars on this line
break;
} else if (temp != ' ') {
parse_line(buffer); // if char is not a # and not whitespace, it is a config directive, parse it
break;
}
Checking to see if something is not equal to another is likely to be just as fast as checking if they are equal (at least on Intel, the je (jump equal) and jne (jump not equal) instructions exhibit the same latency of 1 cycle each), so the statement with the continue in it is not necessary. The temp variable is so that buffer[i] does not need to be calculated in the second if again in case the first if is false. Also, do what user3121023 stated below (same reason for performance as creating the temp variable).
You can use operating-system-specific functions (such as thos from the library WINAPI/WIN32/WIN64 (synonyms) on windows) instead of C standard library functions. Microsoft has very good documentation about their functions in the MSDN (Microsoft Developer Network) web site.
Use uint_fast8_t (defined in stdint.h, this typedef is set to the fastest integer type greater than or equal to the size in bits specified in the typedef) when performing operations on the host and port (but use chars when storing the variables on the disk, in order to make read i/o operations faster).
This isn't related to performance , but use return EXIT_SUCCESS; in main instead of return 0;, since using EXIT_SUCCESS is more readable and exhibits the same performance.

Honestly, I can't help but wonder if rolling your own parser is so great.
Why not use an existing JSON or YAML parser and test for keys in the parsed data?
This will be easily extendible by allowing for new keys to be added with very little effort and the common format of the configuration file makes it very easy for developers to edit.
If you are going to roll out your own parser, than some of the previously mentioned advice makes a lot of sense.
The biggest ones are: don't seek the whole buffer, read the single line that's in front of you and report any errors. Also, advance as you go.
Your parser should work correctly if someone would dump a GigaByte of garbage into the configuration file, so make no assumptions about the data.

Related

Implementing a C function that splits a string on a given character and returns an array of strings after the split (along with length of array)

I'm trying to implement a C function that takes a string and then breaks that string on a certain character and returns back an array of strings after the split along with the size of that array. I'm using a data structure for this since returning a 2D array (the array of strings after the split) and its length is not possible. My code is given below:
struct charArr {
char *arr[10000];
int size;
};
struct charArr *stringSplitter(char *str, char c) {
struct charArr *splitString = (struct charArr *)malloc(sizeof(struct charArr));
if (splitString == NULL) {
fprintf(stderr, "malloc failed\n");
exit(1);
}
splitString->size = 0;
int i = 0;
int j = 0;
while (str[i] != '\0') {
if (str[i] == c) {
splitString->arr[splitString->size][j] = '\0';
(splitString->size)++;
j = 0;
i++;
while (str[i] == c) { /* this loop is to ignore continuous occurrences of the character c */
i++;
}
} else {
splitString->arr[splitString->size][j] = str[i];
i++;
j++;
}
}
splitString->arr[splitString->size][j] = '\0';
return splitString;
}
int main(int argc, char *argv[]) {
// take input from command line
if (argc == 1) {
//buffer to store lines
size_t buffer_size = 128;
char *buffer = malloc(buffer_size * sizeof(char));
if (buffer == NULL) {
fprintf(stderr, "malloc failed\n");
exit(1);
}
// loop continuously till user exits by ctrl+c
while (1) {
printf("Enter Input> ");
getline(&buffer, &buffer_size, stdin);
char *str = strdup(buffer);
struct charArr *splitString = stringSplitter(str, '&');
for (int i = 0; i<splitString->size; i++) {
printf("%s ", splitString->arr[i]);
}
}
}
return 0;
}
On running the code on a simple input like (the input is continuously taken from the command line):
Enter Input> this & that
I expect the output to be:
this that
But, I'm getting the error:
Segmentation fault (core dumped)
If the input is as shown below (i.e; continuous occurrences of the splitting character):
Enter Input> this &&& that
then also the output must be:
this that
Edit: I'm trying to extend this to split a string on multiple delimiters as well (in one go), so instead of char c in the above function, if char *c is passed which is a string of delimiters (example c = " \t\n" to remove all white spaces from given string), then also it should work as expected and return an array of strings after the split and length of array.
For example, if input is (multiple spaces, tabs and newline):
Enter Input> this that
Then the array returned (which is a part of the returned structure) must be of size 2 and only contain the 2 strings - "this" and "that".
Here's a rewrite of your function with the corrections that you need for proper allocation of each found string using strdup():
You can find my modifications preceded with comments that start 'Previously':
struct charArr* stringSplitter(char *str, char c){
struct charArr* splitString = (struct charArr*)malloc(sizeof(struct charArr));
char buffer[ MAX_BUFF ] ;
if(splitString == NULL){
fprintf(stderr, "malloc failed\n");
exit(1);
}
splitString->size = 0;
int i=0;
int j=0;
while(str[i] != '\0'){
if(str[i] == c){
//Previously: splitString->arr[splitString->size][j] = '\0';
splitString->arr[splitString->size] = strndup( buffer , j );
(splitString->size)++;
j = 0;
i++;
while(str[i] == c){ /* this loop is to ignore continuous occurrences of the character c */
i++;
}
} else {
// Previously: splitString->arr[splitString->size][j] = str[i];
buffer[j] = str[i];
i++;
j++;
}
}
//Previously: splitString->arr[splitString->size][j] = '\0';
splitString->arr[splitString->size++] = strndup( buffer , j );
return splitString;
}
It's been a long time since I wrote any C so I thought this would be a challenge. Here's a rewrite of the stringSplitter function.
struct charArr* stringSplitter(char *str, char c){
struct charArr* splitString = (struct charArr*)malloc(sizeof(struct charArr));
if(splitString == NULL){
fprintf(stderr, "malloc failed\n");
exit(1);
}
splitString->size = 0;
char sep[2];
sep[0] = c;
sep[1] = (char) 0;
char* next;
while( (next = strtok( str, sep )) )
{
str = NULL;
splitString->arr[ splitString->size++ ] = next;
}
return splitString;
}
Above, I'm simply using strtok. Take a look at the manpage for strtok() to see it's nuances.

Unknown C String Truncation/Overwrite

I am having an interesting memory problem with a simple string manipulation. The problem itself isn't actually in the reading of the string but right before it when I am trying to call the string.
char *removeInvalid(char *token){
fprintf(stderr," Before: %s \n", token);
char *newToken = malloc(sizeof(100) + 1);
fprintf(stderr," After: %s \n", token);
}
Whenever I run this, the string if truncated right after the char *newToken is malloc'd. So the printout of this results in
Before: Willy Wanka's Chochlate Factory
After: Will Wanka's Chochlate F!
Anyone have any clue what this is? I looked at other examples of malloc, but can't figure out how it is going wrong here.
EDIT: FULL CODE BELOW. Take note I am a college student who just began C, so it isn't perfect by anymeans. But it works up until this error.
Function calls goes as follows. Main->initialReadAVL (This part works perfectly)
Then after commandReadAVL is called which goes commandReadAVL->ReadHelper (Again works fine here.
Then CleanUpString->removeSpaces(works fine)
Then CleanUpString->removeInvalid(THIS IS WHERE IT ERRORS)
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "node.h"
#include "avl.h"
#include "scanner.h"
#include "bst.h"
/* Options */
int avlSwitch = 0;
int bstSwitch = 0;
int insertSwitch = 0;
int deleteSwitch = 0;
int frequencySwitch = 0;
int displaySwitch = 0;
int statisticSwitch = 0;
int ProcessOptions(int argc, char **argv);
char *cleanUpString(char *token);
char *turnToLowerCase(char *token);
char *removeSpaces(char *token);
char *removeInvalid(char *token);
char *readHelper(FILE *in);
void Fatal(char *fmt, ...);
void preOrder(struct node *root);
void initialReadAVL(avl *mainAVL, FILE *in);
void initialReadBST(bst *mainBST, FILE *in);
void commandReadBST(bst *mainBST, FILE *commandList);
void commandReadAVL(avl *mainAVL, FILE *commandList);
int main(int argc, char **argv) {
struct avl *mainAVL;
struct bst *mainBST;
FILE *text;
FILE *commandList;
if(argc != 4){
Fatal("There must be 4 arguments of form 'trees -b corpus commands' \n");
}
int argIndex = ProcessOptions(argc,argv);
text = fopen(argv[2], "r");
commandList = fopen(argv[3], "r");
//Protect against an empty file.
if (text == NULL){
fprintf(stderr,"file %s could not be opened for reading\n", argv[2]);
exit(1);
}
if (commandList == NULL){
fprintf(stderr,"file %s could not be opened for reading\n", argv[3]);
exit(1);
}
if (avlSwitch){
mainAVL = newAVL();
initialReadAVL(mainAVL, text);
preOrder(mainAVL->root);
fprintf(stderr,"\n");
commandReadAVL(mainAVL, commandList);
preOrder(mainAVL->root);
fprintf(stderr,"\n");
}
else if (bstSwitch){
mainBST = newBST();
initialReadBST(mainBST, text);
preOrder(mainBST->root);
commandReadBST(mainBST, commandList);
preOrder(mainBST->root);
}
return 0;
}
void commandReadAVL(avl *mainAVL, FILE *commandList){
char *command;
char *textSnip;
while(!feof(commandList)){
command = readHelper(commandList);
textSnip = readHelper(commandList);
textSnip = cleanUpString(textSnip);
if(command != NULL){
switch (command[0]) {
case 'i':
fprintf(stderr,"%s \n", textSnip);
insertAVL(mainAVL, textSnip);
break;
case 'd':
deleteAVL(mainAVL, textSnip);
break;
case 'f':
break;
case 's':
break;
case 'r':
break;
default:
Fatal("option %s not understood\n",command);
}
}
}
}
void commandReadBST(bst *mainBST, FILE *commandList){
char *command;
char *textSnip;
while(!feof(commandList)){
command = readHelper(commandList);
textSnip = readHelper(commandList);
textSnip = cleanUpString(textSnip);
if(command != NULL){
switch (command[0]) {
case 'i':
insertBST(mainBST, textSnip);
break;
case 'd':
deleteBST(mainBST, textSnip);
break;
case 'f':
break;
case 's':
break;
case 'r':
break;
default:
Fatal("option %s not understood\n",command);
}
}
}
}
char *readHelper(FILE *in){
char *token;
if (stringPending(in)){
token = readString(in);
}
else {
token = readToken(in);
}
return token;
}
void initialReadBST(bst *mainBST, FILE *in){
char *token;
while(!feof(in)){
token = readHelper(in);
token = cleanUpString(token);
if (token != NULL){
insertBST(mainBST, token);
}
}
}
void initialReadAVL(avl *mainAVL, FILE *in){
char *token;
while(!feof(in)){
token = readHelper(in);
token = cleanUpString(token);
if (token != NULL){
insertAVL(mainAVL, token);
}
}
}
//Helper Function to clean up a string using all the prerequisites.
char *cleanUpString(char *token){
char *output = malloc(sizeof(*token)+ 1);
if (token != NULL){
output = removeSpaces(token);
fprintf(stderr,"before : %s \n", output);
output = removeInvalid(output);
fprintf(stderr,"%s \n", output);
output = turnToLowerCase(output);
return output;
}
return NULL;
}
//Helper function to turn the given string into lower case letters
char *turnToLowerCase(char *token){
char *output = malloc(sizeof(*token) + 1);
for (int x = 0; x < strlen(token); x++){
output[x] = tolower(token[x]);
}
return output;
}
//Helper function to remove redundent spaces in a string.
char *removeSpaces(char *token){
char *output;
int x = 0;
int y = 0;
while (x < strlen(token)){
if (token[x]== ' ' && x < strlen(token)){
while(token[x] == ' '){
x++;
}
output[y] = ' ';
y++;
output[y] = token[x];
y++;
x++;
}
else {
output[y] = token[x];
y++;
x++;
}
}
return output;
}
char *removeInvalid(char *token){
fprintf(stderr," Before: %s \n", token);
char *newToken = malloc(sizeof(* token)+ 1);
fprintf(stderr," After: %s \n", token);
int x = 0;
int y = 0;
while (x < strlen(token)){
if (!isalpha(token[x]) && token[x] != ' '){
x++;
}
else {
newToken[y] = token[x];
y++;
x++;
}
}
return newToken;
}
//Processes a system ending error.
void Fatal(char *fmt, ...) {
va_list ap;
fprintf(stderr,"An error occured: ");
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
exit(-1);
}
//Processes the options needed to be executed from the command line
int ProcessOptions(int argc, char **argv) {
int argIndex;
int argUsed;
int separateArg;
argIndex = 1;
while (argIndex < argc && *argv[argIndex] == '-')
{
/* check if stdin, represented by "-" is an argument */
/* if so, the end of options has been reached */
if (argv[argIndex][1] == '\0') return argIndex;
separateArg = 0;
argUsed = 0;
if (argv[argIndex][2] == '\0')
{
separateArg = 1;
}
switch (argv[argIndex][1])
{
case 'b':
bstSwitch = 1;
break;
case 'a':
avlSwitch = 1;
break;
default:
Fatal("option %s not understood\n",argv[argIndex]);
}
if (separateArg && argUsed)
++argIndex;
++argIndex;
}
return argIndex;
}
void preOrder(struct node *root) {
if(root != NULL)
{
fprintf(stderr,"%s ", root->key);
preOrder(root->lChild);
preOrder(root->rChild);
}
}
ReadString()
char *
readString(FILE *fp)
{
int ch,index;
char *buffer;
int size = 512;
/* advance to the double quote */
skipWhiteSpace(fp);
if (feof(fp)) return 0;
ch = fgetc(fp);
if (ch == EOF) return 0;
/* allocate the buffer */
buffer = allocateMsg(size,"readString");
if (ch != '\"')
{
fprintf(stderr,"SCAN ERROR: attempt to read a string failed\n");
fprintf(stderr,"first character was <%c>\n",ch);
exit(4);
}
/* toss the double quote, skip to the next character */
ch = fgetc(fp);
/* initialize the buffer index */
index = 0;
/* collect characters until the closing double quote */
while (ch != '\"')
{
if (ch == EOF)
{
fprintf(stderr,"SCAN ERROR: attempt to read a string failed\n");
fprintf(stderr,"no closing double quote\n");
exit(6);
}
if (index > size - 2)
{
++size;
buffer = reallocateMsg(buffer,size,"readString");
}
if (ch == '\\')
{
ch = fgetc(fp);
if (ch == EOF)
{
fprintf(stderr,"SCAN ERROR: attempt to read a string failed\n");
fprintf(stderr,"escaped character missing\n");
exit(6);
}
buffer[index] = convertEscapedChar(ch);
}
else
buffer[index] = ch;
++index;
ch = fgetc(fp);
}
buffer[index] = '\0';
return buffer;
}
INPUT: Commands.txt
i "Willy Wonka's Chochlate Factory"
INPUT testFile.txt
a b c d e f g h i j k l m n o p q r s t u v w x y z
Thanks!
char *turnToLowerCase(char *token){
char *output = malloc(sizeof(*token) + 1);
for (int x = 0; x < strlen(token); x++){
output[x] = tolower(token[x]);
}
return output;
}
This is probably your main issue. You allocate enough space for two characters and then proceed to store lots more than that. You probably wanted:
char *output = malloc(strlen(token) + 1);
Since token is a char*, *token is a char. So sizeof(*token) is sizeof(char) -- definitely not what you want.
You almost certainly have a buffer overrun in some part of the code that you're not showing us. If I were to guess, I'd say you allocate too little storage for token to contain the full string you're writing into it in the first place.
Did you by any chance allocate token using the same erroneous code you have in removeInvalid():
malloc(sizeof(100) + 1);
^^^^^^^^^^^ this doesn't allocate 101 characters, it allocates sizeof(int)+1
char *readHelper(FILE *in){
char * token = malloc(sizeof(char *) + 1);
if (stringPending(in)){
token = readString(in);
}
else {
token = readToken(in);
}
return token;
}
It's hard to make sense of this without being able to see readString or readToken, but this can't possibly be right.
First, you allocate one more byte than is needed for a pointer to one or more characters. What use would such a thing be? If you're not storing a pointer to one or more characters, why use sizeof(char *)? If you are storing a pointer to one or more characters, why add one? It's hard to imagine the reasoning that lead to that line of code.
Then, in the if, you immediately lose the value you got back from malloc because you overwrite token by using it to store something else. If you weren't going to use the value you assigned to token, why did you assign it at all?
Bluntly, a lot of this code simply doesn't make any sense. Without comments, it's hard to understand the reasoning so we could point out what's wrong with it.
Either there was reasoning behind that line of code, in which case it's just completely wrong reasoning. Or worse, the line of code was added with no reasoning in the hopes it would work somehow. Neither method will produce working code.
When you're trying to debug code, first remove anything you added experimentally or that you didn't understand. If you do understand malloc(sizeof(char *) + 1), then please explain what you think it does so that your understanding can be corrected.
Why did you think you needed a buffer that was one byte larger than the size of a pointer to one or more characters?
With the help of David Schwartz and the other posters I was able to find the bug in my problem. When I was allocating memory for my token/output, I wasn't allocating enough space.. Using the erroneous code of
malloc(sizeof(100) + 1);
and
malloc(sizeof(*token) + 1);
both of which produced only a couple of bytes to be allocated. This caused a buffer problem causing random letters and numbers/ truncation to happen. The first resulting in the space equivalent to int + 1 and the second in char + 1. (as I was taking the sizeof token which is just the size of what it originally started as, a char)
To fix this I changed the allocation of my token variable to that of
malloc(strlen(token) + 1);
This allocates a space equivalent to the "string" length of token + 1. Allowing the appropriate space for my problem which would end up with space of <= token.

c read block of lines and store them [duplicate]

I am really new to C, and the reading files thing drives me crazy...
I want read a file including name, born place and phone number, etc. All separated by tab
The format might be like this:
Bob Jason Los Angeles 33333333
Alice Wong Washington DC 111-333-222
So I create a struct to record it.
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
I tried many ways to read this file into struct but it failed.
I tired fread:
read_file = fopen("read.txt", "r");
Person temp;
fread(&temp, sizeof(Person), 100, read_file);
printf("%s %s %s \n", temp.name, temp.address, temp.phone);
But char string does not recorded into temp separated by tab, it read the whole file into temp.name and get weird output.
Then I tried fscanf and sscanf, those all not working for separating tab
fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);
Or
fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);
This separates the string by space, so I get Bob and Jason separately, while indeed, I need to get "Bob Jason" as one char string. And I did separate these format by tab when I created the text file.
Same for sscanf, I tried different ways many times...
Please help...
I suggest:
Use fgets to read the text line by line.
Use strtok to separate the contents of the line by using tab as the delimiter.
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
char* token = strtok(line, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
// Copy token at most the number of characters
// temp.name can hold. Similar logic applies to address
// and phone number.
temp.name[0] = '\0';
strncat(temp.name, token, sizeof(temp.name)-1);
}
token = strtok(NULL, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.address[0] = '\0';
strncat(temp.address, token, sizeof(temp.address)-1);
}
token = strtok(NULL, "\n");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.phone[0] = '\0';
strncat(temp.phone, token, sizeof(temp.phone)-1);
}
Update
Using a helper function, the code can be reduced in size. (Thanks #chux)
// The helper function.
void copyToken(char* destination,
char* source,
size_t maxLen;
char const* delimiter)
{
char* token = strtok(source, delimiter);
if ( token != NULL )
{
destination[0] = '\0';
strncat(destination, token, maxLen-1);
}
}
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
copyToken(temp.name, line, sizeof(temp.name), "\t");
copyToken(temp.address, NULL, sizeof(temp.address), "\t");
copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");
This is only for demonstration, there are better ways to initialize variables, but to illustrate your main question i.e. reading a file delimited by tabs, you can write a function something like this:
Assuming a strict field definition, and your struct definition you can get tokens using strtok().
//for a file with constant field definitions
void GetFileContents(char *file, PERSON *person)
{
char line[260];
FILE *fp;
char *buf=0;
char temp[80];
int i = -1;
fp = fopen(file, "r");
while(fgets(line, 260, fp))
{
i++;
buf = strtok(line, "\t\n");
if(buf) strcpy(person[i].name, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].address, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].phone, buf);
//Note: if you have more fields, add more strtok/strcpy sections
//Note: This method will ONLY work for consistent number of fields.
//If variable number of fields, suggest 2 dimensional string array.
}
fclose(fp);
}
Call it in main() like this:
int main(void)
{
//...
PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere
//and there are better ways
//this is just for illustration
pPerson = &person[0];//initialize pointer to person
GetFileContents(filename, pPerson); //call function to populate person.
//...
return 0;
}
First thing,
fread(&temp, sizeof(temp), 100, read_file);
will not work because the fields are not fixed width, so it will always read 20 characters for name 30 for address and so on, which is not always the correct thing to do.
You need to read one line at a time, and then parse the line, you can use any method you like to read a like, a simple one is by using fgets() like this
char line[100];
Person persons[100];
int index;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
persons[i++] = parseLineAndExtractPerson(line);
}
Now we need a function to parse the line and store the data in you Person struct instance
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
Here is a sample implementation of a loop to read at most 100 records
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/path/to/the/file.type", "r");
if (read_file == NULL)
return -1;
index = 0;
while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL))
{
size_t length;
/* remove the '\n' left by `fgets()'. */
length = strlen(line);
if ((length > 0) && (line[length - 1] == '\n'))
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Here is a complete program that does what I think you need
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/home/iharob/data.dat", "r");
if (read_file == NULL)
return -1;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
size_t length;
length = strlen(line);
if (line[length - 1] == '\n')
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Parsing strings returned by fgets can be very annoying, especially when input is truncated. In fact, fgets leaves a lot to be desired. Did you get the correct string or was there more? Is there a newline at the end? For that matter, is the end 20 bytes away or 32768 bytes away? It would be nice if you didn't need to count that many bytes twice -- once with fgets and once with strlen, just to remove a newline that you didn't want.
Things like fscanf don't necessarily work as intended in this situation unless you have C99's "scanset" feature available, and then that will automatically add a null terminator, if you have enough room. The return value of any of the scanf family is your friend in determining whether success or failure occurred.
You can avoid the null terminator by using %NNc, where NN is the width, but if there's a \t in those NN bytes, then you need to separate it and move it to the next field, except that means bytes in the next field must be moved to the field after that one, and the 90th field will need its bytes moved to the 91st field... And hopefully you only need to do that once... Obviously that isn't actually a solution either.
Given those reasons, I feel it's easier just to read until you encounter one of the expected delimiters and let you decide the behavior of the function when the size specified is too small for a null terminator, yet large enough to fill your buffer. Anyway, here's the code. I think it's pretty straightforward:
/*
* Read a token.
*
* tok: The buffer used to store the token.
* max: The maximum number of characters to store in the buffer.
* delims: A string containing the individual delimiter bytes.
* fileptr: The file pointer to read the token from.
*
* Return value:
* - max: The buffer is full. In this case, the string _IS NOT_ null terminated.
* This may or may not be a problem: it's your choice.
* - (size_t)-1: An I/O error occurred before the last delimiter
* (just like with `fgets`, use `feof`).
* - any other value: The length of the token as `strlen` would return.
* In this case, the string _IS_ null terminated.
*/
size_t
read_token(char *restrict tok, size_t max, const char *restrict delims,
FILE *restrict fileptr)
{
int c;
size_t n;
for (n = 0; n < max && (c = getchar()) != EOF &&
strchr(delims, c) == NULL; ++n)
*tok++ = c;
if (c == EOF)
return (size_t)-1;
if (n == max)
return max;
*tok = 0;
return n;
}
Usage is pretty straightforward as well:
#include <stdio.h>
#include <stdlib.h>
typedef struct person {
char name[20];
char address[30];
char phone[20];
} Person;
int
main(void)
{
FILE *read_file;
Person temp;
size_t line_num;
size_t len;
int c;
int exit_status = EXIT_SUCCESS;
read_file = fopen("read.txt", "r");
if (read_file == NULL) {
fprintf(stderr, "Error opening read.txt\n");
return 1;
}
for (line_num = 0;; ++line_num) {
/*
* Used for detecting early EOF
* (e.g. the last line contains only a name).
*/
temp.name[0] = temp.phone[0] = 0;
len = read_token(temp.name, sizeof(temp.name), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.address, sizeof(temp.address), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.phone, sizeof(temp.phone), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
// Do something with the input here. Example:
printf("Entry %zu:\n"
"\tName: %.*s\n"
"\tAddress: %.*s\n"
"\tPhone: %.*s\n\n",
line_num + 1,
(int)sizeof(temp.name), temp.name,
(int)sizeof(temp.address), temp.address,
(int)sizeof(temp.phone), temp.phone);
}
if (ferror(read_file)) {
fprintf(stderr, "error reading from file\n");
exit_status = EXIT_FAILURE;
}
else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) {
fprintf(stderr, "Unexpected end of file while reading entry %zu\n",
line_num + 1);
exit_status = EXIT_FAILURE;
}
//else feof(read_file) is still true, but we parsed a full entry/record
fclose(read_file);
return exit_status;
}
Notice how the exact same 8 lines of code appear in the read loop to handle the return value of read_token? Because of that, I think there's probably room for another function to call read_token and handle its return value, allowing main to simply call this "read_token handler", but I think the code above gives you the basic idea about how to work with read_token and how it can apply in your situation. You might change the behavior in some way, if you like, but the read_token function above would suit me rather well when working with delimited input like this (things would be a bit more complex when you add quoted fields into the mix, but not much more complex as far as I can tell). You can decide what happens with max being returned. I opted for it being considered an error, but you might think otherwise. You might even add an extra getchar when n == max and consider max being a successful return value and something like (size_t)-2 being the "token too large" error indicator instead.

Read files separated by tab in c

I am really new to C, and the reading files thing drives me crazy...
I want read a file including name, born place and phone number, etc. All separated by tab
The format might be like this:
Bob Jason Los Angeles 33333333
Alice Wong Washington DC 111-333-222
So I create a struct to record it.
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
I tried many ways to read this file into struct but it failed.
I tired fread:
read_file = fopen("read.txt", "r");
Person temp;
fread(&temp, sizeof(Person), 100, read_file);
printf("%s %s %s \n", temp.name, temp.address, temp.phone);
But char string does not recorded into temp separated by tab, it read the whole file into temp.name and get weird output.
Then I tried fscanf and sscanf, those all not working for separating tab
fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);
Or
fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);
This separates the string by space, so I get Bob and Jason separately, while indeed, I need to get "Bob Jason" as one char string. And I did separate these format by tab when I created the text file.
Same for sscanf, I tried different ways many times...
Please help...
I suggest:
Use fgets to read the text line by line.
Use strtok to separate the contents of the line by using tab as the delimiter.
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
char* token = strtok(line, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
// Copy token at most the number of characters
// temp.name can hold. Similar logic applies to address
// and phone number.
temp.name[0] = '\0';
strncat(temp.name, token, sizeof(temp.name)-1);
}
token = strtok(NULL, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.address[0] = '\0';
strncat(temp.address, token, sizeof(temp.address)-1);
}
token = strtok(NULL, "\n");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.phone[0] = '\0';
strncat(temp.phone, token, sizeof(temp.phone)-1);
}
Update
Using a helper function, the code can be reduced in size. (Thanks #chux)
// The helper function.
void copyToken(char* destination,
char* source,
size_t maxLen;
char const* delimiter)
{
char* token = strtok(source, delimiter);
if ( token != NULL )
{
destination[0] = '\0';
strncat(destination, token, maxLen-1);
}
}
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
copyToken(temp.name, line, sizeof(temp.name), "\t");
copyToken(temp.address, NULL, sizeof(temp.address), "\t");
copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");
This is only for demonstration, there are better ways to initialize variables, but to illustrate your main question i.e. reading a file delimited by tabs, you can write a function something like this:
Assuming a strict field definition, and your struct definition you can get tokens using strtok().
//for a file with constant field definitions
void GetFileContents(char *file, PERSON *person)
{
char line[260];
FILE *fp;
char *buf=0;
char temp[80];
int i = -1;
fp = fopen(file, "r");
while(fgets(line, 260, fp))
{
i++;
buf = strtok(line, "\t\n");
if(buf) strcpy(person[i].name, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].address, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].phone, buf);
//Note: if you have more fields, add more strtok/strcpy sections
//Note: This method will ONLY work for consistent number of fields.
//If variable number of fields, suggest 2 dimensional string array.
}
fclose(fp);
}
Call it in main() like this:
int main(void)
{
//...
PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere
//and there are better ways
//this is just for illustration
pPerson = &person[0];//initialize pointer to person
GetFileContents(filename, pPerson); //call function to populate person.
//...
return 0;
}
First thing,
fread(&temp, sizeof(temp), 100, read_file);
will not work because the fields are not fixed width, so it will always read 20 characters for name 30 for address and so on, which is not always the correct thing to do.
You need to read one line at a time, and then parse the line, you can use any method you like to read a like, a simple one is by using fgets() like this
char line[100];
Person persons[100];
int index;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
persons[i++] = parseLineAndExtractPerson(line);
}
Now we need a function to parse the line and store the data in you Person struct instance
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
Here is a sample implementation of a loop to read at most 100 records
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/path/to/the/file.type", "r");
if (read_file == NULL)
return -1;
index = 0;
while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL))
{
size_t length;
/* remove the '\n' left by `fgets()'. */
length = strlen(line);
if ((length > 0) && (line[length - 1] == '\n'))
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Here is a complete program that does what I think you need
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/home/iharob/data.dat", "r");
if (read_file == NULL)
return -1;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
size_t length;
length = strlen(line);
if (line[length - 1] == '\n')
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Parsing strings returned by fgets can be very annoying, especially when input is truncated. In fact, fgets leaves a lot to be desired. Did you get the correct string or was there more? Is there a newline at the end? For that matter, is the end 20 bytes away or 32768 bytes away? It would be nice if you didn't need to count that many bytes twice -- once with fgets and once with strlen, just to remove a newline that you didn't want.
Things like fscanf don't necessarily work as intended in this situation unless you have C99's "scanset" feature available, and then that will automatically add a null terminator, if you have enough room. The return value of any of the scanf family is your friend in determining whether success or failure occurred.
You can avoid the null terminator by using %NNc, where NN is the width, but if there's a \t in those NN bytes, then you need to separate it and move it to the next field, except that means bytes in the next field must be moved to the field after that one, and the 90th field will need its bytes moved to the 91st field... And hopefully you only need to do that once... Obviously that isn't actually a solution either.
Given those reasons, I feel it's easier just to read until you encounter one of the expected delimiters and let you decide the behavior of the function when the size specified is too small for a null terminator, yet large enough to fill your buffer. Anyway, here's the code. I think it's pretty straightforward:
/*
* Read a token.
*
* tok: The buffer used to store the token.
* max: The maximum number of characters to store in the buffer.
* delims: A string containing the individual delimiter bytes.
* fileptr: The file pointer to read the token from.
*
* Return value:
* - max: The buffer is full. In this case, the string _IS NOT_ null terminated.
* This may or may not be a problem: it's your choice.
* - (size_t)-1: An I/O error occurred before the last delimiter
* (just like with `fgets`, use `feof`).
* - any other value: The length of the token as `strlen` would return.
* In this case, the string _IS_ null terminated.
*/
size_t
read_token(char *restrict tok, size_t max, const char *restrict delims,
FILE *restrict fileptr)
{
int c;
size_t n;
for (n = 0; n < max && (c = getchar()) != EOF &&
strchr(delims, c) == NULL; ++n)
*tok++ = c;
if (c == EOF)
return (size_t)-1;
if (n == max)
return max;
*tok = 0;
return n;
}
Usage is pretty straightforward as well:
#include <stdio.h>
#include <stdlib.h>
typedef struct person {
char name[20];
char address[30];
char phone[20];
} Person;
int
main(void)
{
FILE *read_file;
Person temp;
size_t line_num;
size_t len;
int c;
int exit_status = EXIT_SUCCESS;
read_file = fopen("read.txt", "r");
if (read_file == NULL) {
fprintf(stderr, "Error opening read.txt\n");
return 1;
}
for (line_num = 0;; ++line_num) {
/*
* Used for detecting early EOF
* (e.g. the last line contains only a name).
*/
temp.name[0] = temp.phone[0] = 0;
len = read_token(temp.name, sizeof(temp.name), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.address, sizeof(temp.address), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.phone, sizeof(temp.phone), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
// Do something with the input here. Example:
printf("Entry %zu:\n"
"\tName: %.*s\n"
"\tAddress: %.*s\n"
"\tPhone: %.*s\n\n",
line_num + 1,
(int)sizeof(temp.name), temp.name,
(int)sizeof(temp.address), temp.address,
(int)sizeof(temp.phone), temp.phone);
}
if (ferror(read_file)) {
fprintf(stderr, "error reading from file\n");
exit_status = EXIT_FAILURE;
}
else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) {
fprintf(stderr, "Unexpected end of file while reading entry %zu\n",
line_num + 1);
exit_status = EXIT_FAILURE;
}
//else feof(read_file) is still true, but we parsed a full entry/record
fclose(read_file);
return exit_status;
}
Notice how the exact same 8 lines of code appear in the read loop to handle the return value of read_token? Because of that, I think there's probably room for another function to call read_token and handle its return value, allowing main to simply call this "read_token handler", but I think the code above gives you the basic idea about how to work with read_token and how it can apply in your situation. You might change the behavior in some way, if you like, but the read_token function above would suit me rather well when working with delimited input like this (things would be a bit more complex when you add quoted fields into the mix, but not much more complex as far as I can tell). You can decide what happens with max being returned. I opted for it being considered an error, but you might think otherwise. You might even add an extra getchar when n == max and consider max being a successful return value and something like (size_t)-2 being the "token too large" error indicator instead.

Read one line of a text file in C on Unix — my read_line is broken?

I want to make a function that reads a line of your choice, from a given text file. Moving on to the function as parameters (int fd of the open, and int line_number)
It must do so using the language C and Unix system calls (read and / or open).
It should also read any spaces, and it must not have real limits (ie the line must be able to have a length of your choice).
The function I did is this:
char* read_line(int file, int numero_riga){
char myb[1];
if (numero_riga < 1) {
return NULL;
}
char* myb2 = malloc(sizeof(char)*100);
memset(myb2, 0, sizeof(char));
ssize_t n;
int i = 1;
while (i < numero_riga) {
if((n = read(file, myb, 1)) == -1){
perror("read fail");
exit(EXIT_FAILURE);
}
if (strncmp(myb, "\n", 1) == 0) {
i++;
}else if (n == 0){
return NULL;
}
}
numero_riga++;
int j = 0;
while (i < numero_riga) {
ssize_t n = read(file, myb, 1);
if (strncmp(myb, "\n", 1) == 0) {
i++;
}else if (n == 0){
return myb2;
}else{
myb2[j] = myb[0];
j++;
}
}
return myb2;
}
Until recently, I thought that this would work but it really has some problems.
Using message queues, the string read by the read_line is received as a void string ( "\0" ). I know the message queues are not the problem because trying to pass a normal string did not create the problem.
If possible I would like a fix with explanation of why I should correct it in a certain way. This is because if I do not understand my mistakes I risk repeating them in the future.
EDIT 1. Based upon the answers I decided to add some questions.
How do I end myb2? Can someone give me an example based on my code?
How do I know in advance the amount of characters that make up a line of txt to read?
EDIT 2. I don't know the number of char the line have so I don't know how many char to allocate; that's why I use *100.
Partial Analysis
You've got a memory leak at:
char* myb2 = (char*) malloc((sizeof(char*))*100);
memset(myb2, 0, sizeof(char));
if (numero_riga < 1) {
return NULL;
}
Check numero_riga before you allocate the memory.
The following loop is also dubious at best:
int i = 1;
while (i < numero_riga) {
ssize_t n = read(file, myb, 1);
if (strncmp(myb, "\n", 1) == 0) {
i++;
}else if (n == 0){
return NULL;
}
}
You don't check whether read() actually returned anything quick enough, and when you do check, you leak memory (again) and ignore anything that was read beforehand, and you don't detect errors (n < 0). When you do detect a newline, you simply add 1 to i. At no point do you save the character read in a buffer (such as myb2). All in all, that seem's pretty thoroughly broken…unless…unless you're trying to read the Nth line in the file from scratch, rather than the next line in the file, which is more usual.
What you need to be doing is:
scan N-1 lines, paying attention to EOF
while another byte is available
if it is newline, terminate the string and return it
otherwise, add it to the buffer, allocating space if there isn't room.
Implementation
I think I'd probably use a function get_ch() like this:
static inline int get_ch(int fd)
{
char c;
if (read(fd, &c, 1) == 1)
return (unsigned char)c;
return EOF;
}
Then in the main char *read_nth_line(int fd, int line_no) function you can do:
char *read_nth_line(int fd, int line_no)
{
if (line_no <= 0)
return NULL;
/* Skip preceding lines */
for (int i = 1; i < line_no; i++)
{
int c;
while ((c = get_ch(fd)) != '\n')
{
if (c == EOF)
return NULL;
}
}
/* Capture next line */
size_t max_len = 8;
size_t act_len = 0;
char *buffer = malloc(8);
int c;
while ((c = get_ch(fd)) != EOF && c != '\n')
{
if (act_len + 2 >= max_len)
{
size_t new_len = max_len * 2;
char *new_buf = realloc(buffer, new_len);
if (new_buf == 0)
{
free(buffer);
return NULL;
}
buffer = new_buf;
max_len = new_len;
}
buffer[act_len++] = c;
}
if (c == '\n')
buffer[act_len++] = c;
buffer[act_len] = '\0';
return buffer;
}
Test code added:
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
extern char *read_nth_line(int fd, int line_no);
…code from main answer…
int main(void)
{
char *line;
while ((line = read_nth_line(0, 3)) != NULL)
{
printf("[[%s]]\n", line);
free(line);
}
return 0;
}
This reads every third line from standard input. It seems to work correctly. It would be a good idea to do more exhaustive checking of boundary conditions (short lines, etc) to make sure it doesn't abuse memory. (Testing lines of lengths 1 — newline only — up to 18 characters with valgrind shows it is OK. Random longer tests also seemed to be correct.)

Resources