How do i read an array of strings from a file? - c

I have a file with different words separated by newlines. How can i read and store each word in an array of strings?
1 word, 1 row of the array.
I'm posting this code, but im pretty sure it doesnt work, because i can't understand if i should use fgets or fscanf and how i can write each word in each line of my array.
int file_string_temp_number_rows=200;
int file_string_temp_number_cols=200;
char **file_string_arr = (char**)malloc (file_string_temp_number_rows*sizeof(char));
for ( i = 0 ; i < file_string_temp_number_rows ; i++){
file_string_arr[i] = (char*)malloc(file_string_temp_number_cols*sizeof(char));
}
if ((file_ptr= fopen(filename, "r"))){
if((file_ptr=fopen(filename,"r"))==NULL)
{
printf("errore apertura file");
return 1;
}
else{
while(!feof(file_ptr)){
for(i = 0 ; i < file_string_temp_number_rows ; i++){
for(j = 0 ; j < file_string_temp_number ; j++){
fgets(file_string_arr , 40 , filename);
}
}
}
}
}
}

Addressing your title question: How do i read an array of strings from a file?
There are many approaches to do this. Here is a list of basic steps that could be used.
1) Using fopen(), open file and scan to determine the following:
- Maximum word length.
- Count of words in file.
2) Create container: - Use calloc() to create an array of strings for words.
3) Using fopen() (again), fgets() and strtok() (or variant) to parse content of file into string array.
Note, The sample implementation snippets below use particular functions and techniques, but you should not limit your implementation to only these. There are many paths that would work just as well, so do not be afraid to experiment. For example, either fgets() or fscanf() could be used to solve this problem. The methods highlighted below are just examples of one way to do the task.
Scan example
// provides count of words, and longest word
int longestWord(char *file, int *nWords)
{
FILE *fp=0;
int cnt=0, longest=0, numWords=0;
char c;
fp = fopen(file, "r");
if(fp)
{
// if((strlen(buf) > 0) && (buf[0] != '\t') && (buf[0] != '\n') && (buf[0] != '\0')&& (buf[0] > 0))
while ( (c = fgetc(fp) ) != EOF )
{
if ( isalnum (c) ) cnt++;
else if ( ( ispunct (c) ) || ( isspace(c) ) || (c == '\0' ))
{
(cnt > longest) ? (longest = cnt, cnt=0) : (cnt=0);
numWords++;
}
}
*nWords = numWords;
fclose(fp);
}
else return -1;
return longest;
}
//in main eg:
int longest;
int count;
...
longest = longestWord(".\\file.txt", &count);//longest and count will be
//used to create string arrays
Create string array example
//Create string arrays to contain words using result from original scan of file
char ** Create2DStr(ssize_t numStrings, ssize_t maxStrLen)
{
int i;
char **a = {0};
a = calloc(numStrings, sizeof(char *));
for(i=0;i<numStrings; i++)
{
a[i] = calloc(maxStrLen + 1, 1);
}
return a;
}
// in main(): Create array of words
char **words = Create2DStr(count, longest);//Using values obtained from scan section above
if(words) { //continue
Parse into word strings example
// in main(), after performing scan and array creation:
const char delim[] = {" \n\t"};
char line[260];
char *buf = NULL;
fp = fopen(".\\file.txt", "r");
cnt=0;
while(fgets(line, 260, fp))//keep reading lines until EOF
{
buf = strtok(line, delim);
while(buf)//continue until last word in line is parsed
{
if((strlen(buf) > 0)
{
strcpy(words[cnt], buf);
cnt++; //use as accurate count of words.
}
buf = strtok(NULL, DELIM);
}
}
fclose(fp);

Related

Dynamically allocated unknown length string reading from file (it has to be protected from reading numbers from the file) in C

My problem is such that I need to read string from file. File example:
Example 1 sentence
Example sentence number xd 595 xd 49 lol
but I have to read only the string part, not numbers. I guess I have to use fscanf() with %s for it but let me know what you guys think about it.
The part where my problem begins is how to read the string (it is unknown length) using malloc(), realloc()? I tried it by myself, but I failed (my solution is at bottom of my post).
Then I need to show the result on the screen.
P.S. I have to use malloc()/calloc(), realloc() <-- it has to be dynamically allocated string :) (char *)
Code I've tried:
int wordSize = 2;
char *word = (char *)malloc(wordSize*sizeof(char));
char ch;
FILE* InputWords = NULL;
InputWords = fopen(ListOfWords,"r"); /* variable ListOfWords contains name of the file */
if (InputWords == NULL)
{
printf("Error while opening the file.\n");
return 0;
}
int index = 0;
while((ch = fgetc(InputWords)) != -1)
{
if(ch == ' ')
{
printf("%s\n", word);
wordSize = 2;
index = 0;
free(word);
char* word = (char *)malloc(wordSize*sizeof(char));
}
else
{
wordSize++;
word = (char *)realloc(word, wordSize*sizeof(char));
strcpy(word,ch);
index++;
}
}
fclose(InputWords);
For your code, you have something have to improve:
fgetc return the int type not char. So change char ch to int ch;
As the comment of #pmg use EOF (may be any negative value) instead of -1`
strcpy(word,ch); you try to copy character (ch) to character pointer (word).
Do not cast malloc or realloc function: Do I cast the result of malloc?.
For solving your question, i propose you use the strtok function to split string by space character, then test each word is number or not. If the word is not a number, you can use strcat to concatenate the word to the old sentence.
The complete code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
int is_number(char *str) {
if (strlen(str) == 0)
return -1;
for(int i =0; (i < strlen(str)) && (str[i] != '\n') ; i++) {
if(!isdigit(str[i]))
return -1;
}
return 1;
}
int main()
{
FILE *fp = fopen("input.txt", "r");
char line[256];
if(!fp) return -1;
char **sentence;
int i = 0;
sentence = malloc(sizeof(char *));
if(!sentence) return -1;
while(fgets(line, 256, fp)) {
char * token = strtok(line, " ");
size_t len = 0;
sentence = realloc(sentence, sizeof(char *) * (i+1));
if(!sentence) return -1;
while(token != NULL) {
if (is_number(token) != 1) {
sentence[i] = realloc(sentence[i], len + 2 + strlen(token)); // +2 because 1 for null character and 1 for space character
if (!sentence[i]) {
printf("cannot realloc\n");
return -1;
}
strcat(strcat(sentence[i], " "), token);
len = strlen(sentence[i]);
}
token = strtok(NULL, " ");
}
if(len > 0)
i++;
}
for(int j = 0; j < i; j++) {
printf("line[%d]: %s", j, sentence[j]);
}
for(int j = 0; j < i; j++) {
free(sentence[j]);
}
free(sentence);
fclose(fp);
return 0;
}
The input and output:
$cat input.txt
Example 1 sentence
Example sentence number xd 595 xd 49 lol
./test
line[0]: Example sentence
line[1]: Example sentence number xd xd lol

Implementing a C function that splits a string on a given character and returns an array of strings after the split (along with length of array)

I'm trying to implement a C function that takes a string and then breaks that string on a certain character and returns back an array of strings after the split along with the size of that array. I'm using a data structure for this since returning a 2D array (the array of strings after the split) and its length is not possible. My code is given below:
struct charArr {
char *arr[10000];
int size;
};
struct charArr *stringSplitter(char *str, char c) {
struct charArr *splitString = (struct charArr *)malloc(sizeof(struct charArr));
if (splitString == NULL) {
fprintf(stderr, "malloc failed\n");
exit(1);
}
splitString->size = 0;
int i = 0;
int j = 0;
while (str[i] != '\0') {
if (str[i] == c) {
splitString->arr[splitString->size][j] = '\0';
(splitString->size)++;
j = 0;
i++;
while (str[i] == c) { /* this loop is to ignore continuous occurrences of the character c */
i++;
}
} else {
splitString->arr[splitString->size][j] = str[i];
i++;
j++;
}
}
splitString->arr[splitString->size][j] = '\0';
return splitString;
}
int main(int argc, char *argv[]) {
// take input from command line
if (argc == 1) {
//buffer to store lines
size_t buffer_size = 128;
char *buffer = malloc(buffer_size * sizeof(char));
if (buffer == NULL) {
fprintf(stderr, "malloc failed\n");
exit(1);
}
// loop continuously till user exits by ctrl+c
while (1) {
printf("Enter Input> ");
getline(&buffer, &buffer_size, stdin);
char *str = strdup(buffer);
struct charArr *splitString = stringSplitter(str, '&');
for (int i = 0; i<splitString->size; i++) {
printf("%s ", splitString->arr[i]);
}
}
}
return 0;
}
On running the code on a simple input like (the input is continuously taken from the command line):
Enter Input> this & that
I expect the output to be:
this that
But, I'm getting the error:
Segmentation fault (core dumped)
If the input is as shown below (i.e; continuous occurrences of the splitting character):
Enter Input> this &&& that
then also the output must be:
this that
Edit: I'm trying to extend this to split a string on multiple delimiters as well (in one go), so instead of char c in the above function, if char *c is passed which is a string of delimiters (example c = " \t\n" to remove all white spaces from given string), then also it should work as expected and return an array of strings after the split and length of array.
For example, if input is (multiple spaces, tabs and newline):
Enter Input> this that
Then the array returned (which is a part of the returned structure) must be of size 2 and only contain the 2 strings - "this" and "that".
Here's a rewrite of your function with the corrections that you need for proper allocation of each found string using strdup():
You can find my modifications preceded with comments that start 'Previously':
struct charArr* stringSplitter(char *str, char c){
struct charArr* splitString = (struct charArr*)malloc(sizeof(struct charArr));
char buffer[ MAX_BUFF ] ;
if(splitString == NULL){
fprintf(stderr, "malloc failed\n");
exit(1);
}
splitString->size = 0;
int i=0;
int j=0;
while(str[i] != '\0'){
if(str[i] == c){
//Previously: splitString->arr[splitString->size][j] = '\0';
splitString->arr[splitString->size] = strndup( buffer , j );
(splitString->size)++;
j = 0;
i++;
while(str[i] == c){ /* this loop is to ignore continuous occurrences of the character c */
i++;
}
} else {
// Previously: splitString->arr[splitString->size][j] = str[i];
buffer[j] = str[i];
i++;
j++;
}
}
//Previously: splitString->arr[splitString->size][j] = '\0';
splitString->arr[splitString->size++] = strndup( buffer , j );
return splitString;
}
It's been a long time since I wrote any C so I thought this would be a challenge. Here's a rewrite of the stringSplitter function.
struct charArr* stringSplitter(char *str, char c){
struct charArr* splitString = (struct charArr*)malloc(sizeof(struct charArr));
if(splitString == NULL){
fprintf(stderr, "malloc failed\n");
exit(1);
}
splitString->size = 0;
char sep[2];
sep[0] = c;
sep[1] = (char) 0;
char* next;
while( (next = strtok( str, sep )) )
{
str = NULL;
splitString->arr[ splitString->size++ ] = next;
}
return splitString;
}
Above, I'm simply using strtok. Take a look at the manpage for strtok() to see it's nuances.

I am kinda new to C programming. I Need help to push a string from a file(test.txt) into an array without the commas

I am trying to solve a problem on Dynamic memory allocation by reading the input from a file by malloc(),free(),realloc(); i just need help to push the strings into an array from the file, without the commas . My test.txt file are as follows:
a,5,0
a,25,1
a,1,2
r,10,1,3
f,2
int i;
int count;
char line[256];
char *str[20];//to store the strings without commas
char ch[20];
int main (void)
{
FILE *stream;
if ( (stream = fopen ( "test.txt", "r" )) == NULL )
{ printf ("Cannot read the new file\n");
exit (1);
}
while(fgets(line, sizeof line, stream))
{
printf ("%s", line);
int length = strlen(line);
strcpy(ch,line);
for (i=0;i<length;i++)
{
if (ch[i] != ',')
{
printf ("%c", ch[i]);
}
}
}
//i++;
//FREE(x);
//FREE(y);
//FREE(z);
fclose (stream);
the str[] array should only store values like a520. (excluding the commas)
First of all DO NOT use global variables unless it is absolutely requires.
I am assuming you want str as array of pointers and str[0] stores first line, str[1] stores second line and so on.
For this:
int line_pos = 0; //stores line_number
int char_pos = 0; //stores position in str[line_pos]
while(fgets(line, sizeof(line), stream))
{
printf ("%s", line);
int length = strlen(line);
strcpy(ch,line);
str[line_pos] = calloc(length, sizeof(char)); //allocating memory
for (i=0;i<length;i++)
{
if (ch[i] != ',')
{
*(str[line_pos]+char_pos) = ch[i]; //setting value of str[line][pos]
char_pos++;
}
}
char_pos = 0;
line_pos++;
}
printf("%s", str[0]); //print first line without comma
Note that it only works for 20 lines (because you declared *str[20]) and then for 21st or later lines it leads to overflow and can cause variety of disasters. You can include:
if (line_pos >= 20)
break;
as a safety measure.
Note that slighty more memory is allocated for str(memory allocated = memory_required + number of comma). To prevent this you can set ch to text without comma:
for (i=0;i<length;i++)
{
int j = 0; //stores position in ch
if (line[i] != ',')
{
ch[j++] = line[i];
}
Then allocate memory for str[line_pos] like:
str[line_pos] = calloc(strlen(ch0, sizeof(char));

c read block of lines and store them [duplicate]

I am really new to C, and the reading files thing drives me crazy...
I want read a file including name, born place and phone number, etc. All separated by tab
The format might be like this:
Bob Jason Los Angeles 33333333
Alice Wong Washington DC 111-333-222
So I create a struct to record it.
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
I tried many ways to read this file into struct but it failed.
I tired fread:
read_file = fopen("read.txt", "r");
Person temp;
fread(&temp, sizeof(Person), 100, read_file);
printf("%s %s %s \n", temp.name, temp.address, temp.phone);
But char string does not recorded into temp separated by tab, it read the whole file into temp.name and get weird output.
Then I tried fscanf and sscanf, those all not working for separating tab
fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);
Or
fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);
This separates the string by space, so I get Bob and Jason separately, while indeed, I need to get "Bob Jason" as one char string. And I did separate these format by tab when I created the text file.
Same for sscanf, I tried different ways many times...
Please help...
I suggest:
Use fgets to read the text line by line.
Use strtok to separate the contents of the line by using tab as the delimiter.
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
char* token = strtok(line, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
// Copy token at most the number of characters
// temp.name can hold. Similar logic applies to address
// and phone number.
temp.name[0] = '\0';
strncat(temp.name, token, sizeof(temp.name)-1);
}
token = strtok(NULL, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.address[0] = '\0';
strncat(temp.address, token, sizeof(temp.address)-1);
}
token = strtok(NULL, "\n");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.phone[0] = '\0';
strncat(temp.phone, token, sizeof(temp.phone)-1);
}
Update
Using a helper function, the code can be reduced in size. (Thanks #chux)
// The helper function.
void copyToken(char* destination,
char* source,
size_t maxLen;
char const* delimiter)
{
char* token = strtok(source, delimiter);
if ( token != NULL )
{
destination[0] = '\0';
strncat(destination, token, maxLen-1);
}
}
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
copyToken(temp.name, line, sizeof(temp.name), "\t");
copyToken(temp.address, NULL, sizeof(temp.address), "\t");
copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");
This is only for demonstration, there are better ways to initialize variables, but to illustrate your main question i.e. reading a file delimited by tabs, you can write a function something like this:
Assuming a strict field definition, and your struct definition you can get tokens using strtok().
//for a file with constant field definitions
void GetFileContents(char *file, PERSON *person)
{
char line[260];
FILE *fp;
char *buf=0;
char temp[80];
int i = -1;
fp = fopen(file, "r");
while(fgets(line, 260, fp))
{
i++;
buf = strtok(line, "\t\n");
if(buf) strcpy(person[i].name, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].address, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].phone, buf);
//Note: if you have more fields, add more strtok/strcpy sections
//Note: This method will ONLY work for consistent number of fields.
//If variable number of fields, suggest 2 dimensional string array.
}
fclose(fp);
}
Call it in main() like this:
int main(void)
{
//...
PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere
//and there are better ways
//this is just for illustration
pPerson = &person[0];//initialize pointer to person
GetFileContents(filename, pPerson); //call function to populate person.
//...
return 0;
}
First thing,
fread(&temp, sizeof(temp), 100, read_file);
will not work because the fields are not fixed width, so it will always read 20 characters for name 30 for address and so on, which is not always the correct thing to do.
You need to read one line at a time, and then parse the line, you can use any method you like to read a like, a simple one is by using fgets() like this
char line[100];
Person persons[100];
int index;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
persons[i++] = parseLineAndExtractPerson(line);
}
Now we need a function to parse the line and store the data in you Person struct instance
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
Here is a sample implementation of a loop to read at most 100 records
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/path/to/the/file.type", "r");
if (read_file == NULL)
return -1;
index = 0;
while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL))
{
size_t length;
/* remove the '\n' left by `fgets()'. */
length = strlen(line);
if ((length > 0) && (line[length - 1] == '\n'))
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Here is a complete program that does what I think you need
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/home/iharob/data.dat", "r");
if (read_file == NULL)
return -1;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
size_t length;
length = strlen(line);
if (line[length - 1] == '\n')
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Parsing strings returned by fgets can be very annoying, especially when input is truncated. In fact, fgets leaves a lot to be desired. Did you get the correct string or was there more? Is there a newline at the end? For that matter, is the end 20 bytes away or 32768 bytes away? It would be nice if you didn't need to count that many bytes twice -- once with fgets and once with strlen, just to remove a newline that you didn't want.
Things like fscanf don't necessarily work as intended in this situation unless you have C99's "scanset" feature available, and then that will automatically add a null terminator, if you have enough room. The return value of any of the scanf family is your friend in determining whether success or failure occurred.
You can avoid the null terminator by using %NNc, where NN is the width, but if there's a \t in those NN bytes, then you need to separate it and move it to the next field, except that means bytes in the next field must be moved to the field after that one, and the 90th field will need its bytes moved to the 91st field... And hopefully you only need to do that once... Obviously that isn't actually a solution either.
Given those reasons, I feel it's easier just to read until you encounter one of the expected delimiters and let you decide the behavior of the function when the size specified is too small for a null terminator, yet large enough to fill your buffer. Anyway, here's the code. I think it's pretty straightforward:
/*
* Read a token.
*
* tok: The buffer used to store the token.
* max: The maximum number of characters to store in the buffer.
* delims: A string containing the individual delimiter bytes.
* fileptr: The file pointer to read the token from.
*
* Return value:
* - max: The buffer is full. In this case, the string _IS NOT_ null terminated.
* This may or may not be a problem: it's your choice.
* - (size_t)-1: An I/O error occurred before the last delimiter
* (just like with `fgets`, use `feof`).
* - any other value: The length of the token as `strlen` would return.
* In this case, the string _IS_ null terminated.
*/
size_t
read_token(char *restrict tok, size_t max, const char *restrict delims,
FILE *restrict fileptr)
{
int c;
size_t n;
for (n = 0; n < max && (c = getchar()) != EOF &&
strchr(delims, c) == NULL; ++n)
*tok++ = c;
if (c == EOF)
return (size_t)-1;
if (n == max)
return max;
*tok = 0;
return n;
}
Usage is pretty straightforward as well:
#include <stdio.h>
#include <stdlib.h>
typedef struct person {
char name[20];
char address[30];
char phone[20];
} Person;
int
main(void)
{
FILE *read_file;
Person temp;
size_t line_num;
size_t len;
int c;
int exit_status = EXIT_SUCCESS;
read_file = fopen("read.txt", "r");
if (read_file == NULL) {
fprintf(stderr, "Error opening read.txt\n");
return 1;
}
for (line_num = 0;; ++line_num) {
/*
* Used for detecting early EOF
* (e.g. the last line contains only a name).
*/
temp.name[0] = temp.phone[0] = 0;
len = read_token(temp.name, sizeof(temp.name), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.address, sizeof(temp.address), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.phone, sizeof(temp.phone), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
// Do something with the input here. Example:
printf("Entry %zu:\n"
"\tName: %.*s\n"
"\tAddress: %.*s\n"
"\tPhone: %.*s\n\n",
line_num + 1,
(int)sizeof(temp.name), temp.name,
(int)sizeof(temp.address), temp.address,
(int)sizeof(temp.phone), temp.phone);
}
if (ferror(read_file)) {
fprintf(stderr, "error reading from file\n");
exit_status = EXIT_FAILURE;
}
else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) {
fprintf(stderr, "Unexpected end of file while reading entry %zu\n",
line_num + 1);
exit_status = EXIT_FAILURE;
}
//else feof(read_file) is still true, but we parsed a full entry/record
fclose(read_file);
return exit_status;
}
Notice how the exact same 8 lines of code appear in the read loop to handle the return value of read_token? Because of that, I think there's probably room for another function to call read_token and handle its return value, allowing main to simply call this "read_token handler", but I think the code above gives you the basic idea about how to work with read_token and how it can apply in your situation. You might change the behavior in some way, if you like, but the read_token function above would suit me rather well when working with delimited input like this (things would be a bit more complex when you add quoted fields into the mix, but not much more complex as far as I can tell). You can decide what happens with max being returned. I opted for it being considered an error, but you might think otherwise. You might even add an extra getchar when n == max and consider max being a successful return value and something like (size_t)-2 being the "token too large" error indicator instead.

Read files separated by tab in c

I am really new to C, and the reading files thing drives me crazy...
I want read a file including name, born place and phone number, etc. All separated by tab
The format might be like this:
Bob Jason Los Angeles 33333333
Alice Wong Washington DC 111-333-222
So I create a struct to record it.
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
I tried many ways to read this file into struct but it failed.
I tired fread:
read_file = fopen("read.txt", "r");
Person temp;
fread(&temp, sizeof(Person), 100, read_file);
printf("%s %s %s \n", temp.name, temp.address, temp.phone);
But char string does not recorded into temp separated by tab, it read the whole file into temp.name and get weird output.
Then I tried fscanf and sscanf, those all not working for separating tab
fscanf(read_file, "%s %s %s", temp.name, temp.address, temp.phone);
Or
fscanf(read_file, "%s\t%s\t%s", temp.name, temp.address, temp.phone);
This separates the string by space, so I get Bob and Jason separately, while indeed, I need to get "Bob Jason" as one char string. And I did separate these format by tab when I created the text file.
Same for sscanf, I tried different ways many times...
Please help...
I suggest:
Use fgets to read the text line by line.
Use strtok to separate the contents of the line by using tab as the delimiter.
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
char* token = strtok(line, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
// Copy token at most the number of characters
// temp.name can hold. Similar logic applies to address
// and phone number.
temp.name[0] = '\0';
strncat(temp.name, token, sizeof(temp.name)-1);
}
token = strtok(NULL, "\t");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.address[0] = '\0';
strncat(temp.address, token, sizeof(temp.address)-1);
}
token = strtok(NULL, "\n");
if ( token == NULL )
{
// Deal with error.
}
else
{
temp.phone[0] = '\0';
strncat(temp.phone, token, sizeof(temp.phone)-1);
}
Update
Using a helper function, the code can be reduced in size. (Thanks #chux)
// The helper function.
void copyToken(char* destination,
char* source,
size_t maxLen;
char const* delimiter)
{
char* token = strtok(source, delimiter);
if ( token != NULL )
{
destination[0] = '\0';
strncat(destination, token, maxLen-1);
}
}
// Use an appropriate number for LINE_SIZE
#define LINE_SIZE 200
char line[LINE_SIZE];
if ( fgets(line, sizeof(line), read_file) == NULL )
{
// Deal with error.
}
Person temp;
copyToken(temp.name, line, sizeof(temp.name), "\t");
copyToken(temp.address, NULL, sizeof(temp.address), "\t");
copyToken(temp.phone, NULL, sizeof(temp.phone), "\n");
This is only for demonstration, there are better ways to initialize variables, but to illustrate your main question i.e. reading a file delimited by tabs, you can write a function something like this:
Assuming a strict field definition, and your struct definition you can get tokens using strtok().
//for a file with constant field definitions
void GetFileContents(char *file, PERSON *person)
{
char line[260];
FILE *fp;
char *buf=0;
char temp[80];
int i = -1;
fp = fopen(file, "r");
while(fgets(line, 260, fp))
{
i++;
buf = strtok(line, "\t\n");
if(buf) strcpy(person[i].name, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].address, buf);
buf = strtok(NULL, "\t\n");
if(buf) strcpy(person[i].phone, buf);
//Note: if you have more fields, add more strtok/strcpy sections
//Note: This method will ONLY work for consistent number of fields.
//If variable number of fields, suggest 2 dimensional string array.
}
fclose(fp);
}
Call it in main() like this:
int main(void)
{
//...
PERSON person[NUM_LINES], *pPerson; //NUM_LINES defined elsewhere
//and there are better ways
//this is just for illustration
pPerson = &person[0];//initialize pointer to person
GetFileContents(filename, pPerson); //call function to populate person.
//...
return 0;
}
First thing,
fread(&temp, sizeof(temp), 100, read_file);
will not work because the fields are not fixed width, so it will always read 20 characters for name 30 for address and so on, which is not always the correct thing to do.
You need to read one line at a time, and then parse the line, you can use any method you like to read a like, a simple one is by using fgets() like this
char line[100];
Person persons[100];
int index;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
persons[i++] = parseLineAndExtractPerson(line);
}
Now we need a function to parse the line and store the data in you Person struct instance
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
Here is a sample implementation of a loop to read at most 100 records
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/path/to/the/file.type", "r");
if (read_file == NULL)
return -1;
index = 0;
while ((index < 100) && (fgets(line, sizeof(line), read_file) != NULL))
{
size_t length;
/* remove the '\n' left by `fgets()'. */
length = strlen(line);
if ((length > 0) && (line[length - 1] == '\n'))
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Here is a complete program that does what I think you need
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Person{
char name[20];
char address[30];
char phone[20];
} Person;
char *extractToken(const char *const line, char *buffer, size_t bufferLength)
{
char *pointer;
size_t length;
if ((line == NULL) || (buffer == NULL))
return NULL;
pointer = strpbrk(line, "\t");
if (pointer == NULL)
length = strlen(line);
else
length = pointer - line;
if (length >= bufferLength) /* truncate the string if it was too long */
length = bufferLength - 1;
buffer[length] = '\0';
memcpy(buffer, line, length);
return pointer + 1;
}
Person parseLineAndExtractPerson(const char *line)
{
Person person;
person.name[0] = '\0';
person.address[0] = '\0';
person.phone[0] = '\0';
line = extractToken(line, person.name, sizeof(person.name));
line = extractToken(line, person.address, sizeof(person.address));
line = extractToken(line, person.phone, sizeof(person.phone));
return person;
}
int main(void)
{
char line[100];
Person persons[100];
int index;
FILE *read_file;
read_file = fopen("/home/iharob/data.dat", "r");
if (read_file == NULL)
return -1;
index = 0;
while (fgets(line, sizeof(line), read_file) != NULL)
{
size_t length;
length = strlen(line);
if (line[length - 1] == '\n')
line[length - 1] = '\0';
persons[index++] = parseLineAndExtractPerson(line);
}
fclose(read_file);
while (--index >= 0)
printf("%s: %s, %s\n", persons[index].name, persons[index].address, persons[index].phone);
return 0;
}
Parsing strings returned by fgets can be very annoying, especially when input is truncated. In fact, fgets leaves a lot to be desired. Did you get the correct string or was there more? Is there a newline at the end? For that matter, is the end 20 bytes away or 32768 bytes away? It would be nice if you didn't need to count that many bytes twice -- once with fgets and once with strlen, just to remove a newline that you didn't want.
Things like fscanf don't necessarily work as intended in this situation unless you have C99's "scanset" feature available, and then that will automatically add a null terminator, if you have enough room. The return value of any of the scanf family is your friend in determining whether success or failure occurred.
You can avoid the null terminator by using %NNc, where NN is the width, but if there's a \t in those NN bytes, then you need to separate it and move it to the next field, except that means bytes in the next field must be moved to the field after that one, and the 90th field will need its bytes moved to the 91st field... And hopefully you only need to do that once... Obviously that isn't actually a solution either.
Given those reasons, I feel it's easier just to read until you encounter one of the expected delimiters and let you decide the behavior of the function when the size specified is too small for a null terminator, yet large enough to fill your buffer. Anyway, here's the code. I think it's pretty straightforward:
/*
* Read a token.
*
* tok: The buffer used to store the token.
* max: The maximum number of characters to store in the buffer.
* delims: A string containing the individual delimiter bytes.
* fileptr: The file pointer to read the token from.
*
* Return value:
* - max: The buffer is full. In this case, the string _IS NOT_ null terminated.
* This may or may not be a problem: it's your choice.
* - (size_t)-1: An I/O error occurred before the last delimiter
* (just like with `fgets`, use `feof`).
* - any other value: The length of the token as `strlen` would return.
* In this case, the string _IS_ null terminated.
*/
size_t
read_token(char *restrict tok, size_t max, const char *restrict delims,
FILE *restrict fileptr)
{
int c;
size_t n;
for (n = 0; n < max && (c = getchar()) != EOF &&
strchr(delims, c) == NULL; ++n)
*tok++ = c;
if (c == EOF)
return (size_t)-1;
if (n == max)
return max;
*tok = 0;
return n;
}
Usage is pretty straightforward as well:
#include <stdio.h>
#include <stdlib.h>
typedef struct person {
char name[20];
char address[30];
char phone[20];
} Person;
int
main(void)
{
FILE *read_file;
Person temp;
size_t line_num;
size_t len;
int c;
int exit_status = EXIT_SUCCESS;
read_file = fopen("read.txt", "r");
if (read_file == NULL) {
fprintf(stderr, "Error opening read.txt\n");
return 1;
}
for (line_num = 0;; ++line_num) {
/*
* Used for detecting early EOF
* (e.g. the last line contains only a name).
*/
temp.name[0] = temp.phone[0] = 0;
len = read_token(temp.name, sizeof(temp.name), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.address, sizeof(temp.address), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
len = read_token(temp.phone, sizeof(temp.phone), "\t",
read_file);
if (len == (size_t)-1)
break;
if (len == max) {
fprintf(stderr, "Skipping bad line %zu\n", line_num + 1);
while ((c = getchar()) != EOF && c != '\n')
; /* nothing */
continue;
}
// Do something with the input here. Example:
printf("Entry %zu:\n"
"\tName: %.*s\n"
"\tAddress: %.*s\n"
"\tPhone: %.*s\n\n",
line_num + 1,
(int)sizeof(temp.name), temp.name,
(int)sizeof(temp.address), temp.address,
(int)sizeof(temp.phone), temp.phone);
}
if (ferror(read_file)) {
fprintf(stderr, "error reading from file\n");
exit_status = EXIT_FAILURE;
}
else if (feof(read_file) && temp.phone[0] == 0 && temp.name[0] != 0) {
fprintf(stderr, "Unexpected end of file while reading entry %zu\n",
line_num + 1);
exit_status = EXIT_FAILURE;
}
//else feof(read_file) is still true, but we parsed a full entry/record
fclose(read_file);
return exit_status;
}
Notice how the exact same 8 lines of code appear in the read loop to handle the return value of read_token? Because of that, I think there's probably room for another function to call read_token and handle its return value, allowing main to simply call this "read_token handler", but I think the code above gives you the basic idea about how to work with read_token and how it can apply in your situation. You might change the behavior in some way, if you like, but the read_token function above would suit me rather well when working with delimited input like this (things would be a bit more complex when you add quoted fields into the mix, but not much more complex as far as I can tell). You can decide what happens with max being returned. I opted for it being considered an error, but you might think otherwise. You might even add an extra getchar when n == max and consider max being a successful return value and something like (size_t)-2 being the "token too large" error indicator instead.

Resources