C read entire line of file [closed] - c

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
I am trying to program a tool in C. Part of this program is to use a text file and read it line by line, while storing all lines into an array to have it available for future use.
That's what I have so far:
int main(){
FILE *fp = fopen("file.txt", "ab+");
if (fp == NULL) {
printf("FILE ERROR");
return 1;
}
int lines = 0;
int ch = 0;
while(!feof(fp)){
ch = fgetc(fp);
if(ch == '\n'){
lines++;
}
}
printf("%d\n", lines);
if (lines>0){
int i = 0;
int numProgs = 0;
char* programs[lines];
char line[lines];
FILE *file;
file = fopen("file.txt", "r");
while(fgets(line, sizeof(line), file) != NULL){
programs[i] = strdup(line);
i++;
numProgs++;
}
for (int j= 0; j<sizeof(programs); j++){
printf("%s\n", programs[j]);
}
fclose(file);
fclose(fp);
return 0;
}
My problem is im getting this output:
6 (the number of lines in the file)
Segmentation fault
How can I read a complete line by line , without knowing how long the line is in the beginning. in PHP I can do that very easily, but how can I do that in C?
Thanks for any hint!

fix like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void){
FILE *fp = fopen("file.txt", "r");//!
if (fp == NULL) {
fprintf(stderr, "FILE ERROR\n");
return 1;
}
int lines = 0;
int ch = 0;
int len = 0;//! length of line
int max_len = 0;//! max length of line
while((ch = fgetc(fp))!=EOF){//!
++len;
if(ch == '\n'){
if(max_len < len)
max_len = len;
++lines;
len = 0;
}
}
if(len)
++lines;
fprintf(stderr, "%d lines.\n", lines);
if (lines > 0){
int numProgs = 0;
char *programs[lines];//use malloc, char **programs = malloc(lines * sizeof(*programs));
char line[max_len+1];//!
rewind(fp);//!
while(fgets(line, sizeof(line), fp))
programs[numProgs++] = strdup(line);//!
for (int j= 0; j < numProgs; j++){//!
printf("%s", programs[j]);//!
free(programs[j]);//!
}
}
fclose(fp);
return 0;
}

If you truly want to read an unknown number of characters from an unknown number of lines and store those lines in an array (or, actually, in an object created from a pointer-to-pointer-to-char), then you have a number of options. POSIX getline is a line oriented input function (like fgets) which will read a line of text from the give file each time it is called, and will allocate sufficient storage to hold the line regardless of the length. (as a bonus getline returns the actual number of characters read, eliminating a subsequent call to strlen if the length is needed)
getline eliminates the need for repeated checks on whether fgets actually read the whole line, or just a partial. Further, if your lines are more than a few characters long, the buffered read provided by getline (and fgets) is quite a bit faster than character oriented input (e.g. fgetc). Don't get me wrong, there is nothing wrong with fgetc, and if your files are small and your lines short, you are not going to notice any difference. However, if you are reading a million lines of 500,000 chars each -- you will notice a significant difference.
As for an array, since you don't know how many lines you will read, you really need a pointer-to-pointer-to-char (e.g a double-ponter, char **array) so you can allocate some reasonable number of pointers to begin with, allocate and assign the lines to individual pointer until your limit is reached, then realloc array to increase the number of pointers available, and keep on reading/storing lines.
As with any code that dynamically allocates memory, your must (1) preserve a pointer to each block of memory allocated, so (2) the memory can be freed with no longer in use. You should also validate each allocation (and reallocation) to insure the allocations succeed. When using realloc, always use a temporary pointer so you can validate that realloc succeeds before assigning the new block to the original pointer. If you don't, and realloc fails, you have lost the pointer to your original block of memory that is left untouched, not freed, and you have just created a memory leak.
Lastly, always verify your memory use with a memory error check program such as valgrind on Linux. There are a number of subtle ways to misuse a block of memory.
Putting all that together, you could do something like the following. The code will read all lines from the filename provided as the first argument (or from stdin if no filename is given):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXA = 128 }; /* initial allocation size, MAXA must be >= 1 */
int main (int argc, char **argv) {
char *line = NULL;
char **arr = NULL;
size_t i, maxa = MAXA, n = 0, ndx = 0;
ssize_t nchr = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
/* allocate MAXA pointers to char -- initially & validate */
if (!(arr = calloc (maxa, sizeof *arr))) {
fprintf (stderr, "error: virtual memory exhausted.\n");
return 1;
}
while ((nchr = getline (&line, &n, fp)) != -1) { /* read each line */
while (line[nchr-1] == '\n') line[--nchr] = 0; /* remove '\n' */
if (!(arr[ndx] = strdup (line))) { /* allocate, copy, add to arr */
fprintf (stderr, "error: virtual memory exhausted.\n");
break; /* leave read loop, preserving existing arr */
}
if (++ndx == maxa) { /* if allocation limit reached, realloc arr */
size_t asz = sizeof *arr;
void *tmp = realloc (arr, (maxa + MAXA) * asz);
if (!tmp) { /* validate realloc succeeded */
fprintf (stderr, "error: realloc, memory exhausted.\n");
break; /* preserving original arr */
}
arr = tmp; /* assign & zero (optional) new memory */
memset (arr + (maxa + MAXA) * asz, 0, MAXA * asz);
maxa += MAXA; /* update current allocation limit */
}
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
if (line) free (line); /* free mem allocated by getline */
for (i = 0; i < ndx; i++) /* output array */
printf (" arr[%4zu] : %s\n", i, arr[i]);
for (i = 0; i < ndx; i++) /* free allocated memory */
free (arr[i]); /* free each line */
free (arr); /* free pointers */
return 0;
}
Example Use/Output
$ ./bin/getline_realloc_arr < dat/words_554.txt
arr[ 0] : Aam
arr[ 1] : Aard-vark
arr[ 2] : Aard-wolf
arr[ 3] : Aaronic
...
arr[ 549] : Accompaniment
arr[ 550] : Accompanist
arr[ 551] : Accompany
arr[ 552] : Accompletive
arr[ 553] : Accomplice
Look things over and let me know if you have any questions.

Try Online
#include <stdio.h>
#include <stdlib.h>
char * readLine (FILE * file)
{
size_t len = 0;
int c = 0, i = 0;
long pos = ftell(file);
char * out = 0;
// read the whole line
do { c = fgetc(file); len++; }
while (c!='\0' && c!='\n' && c!=EOF);
// if the cursor didn't move return NULL
if (pos == ftell(file) && c == EOF) return 0;
// allocate required memory
out = (char*)malloc(len+1);
// rewind cursor to beginning of line
fseek (file, pos, SEEK_SET);
// copy the line
do { out[i++] = fgetc(file); }
while (c!='\0' && c!='\n' && c!=EOF);
// make sure there's \0 at the end
out[i] = '\0';
return out;
}
int main (void)
{
// FILE * file = fopen("test.txt", "r");
char * line = readLine(stdin);
while(line)
{
printf(line); // print current line
free(line); // free allocated memory
line = readLine(stdin); // recur
}
return 0;
}

Read up on malloc / realloc and friends.
A first approach for reading a single line might be something along the lines of the following (note that this is a toy program, and as such omits error-checking):
size_t line_length = 0;
char *line = NULL;
char ch;
while ((ch = fgetc(fp)) != '\n') {
line = realloc(line, line_length+1);
line[line_length++] = ch;
}
// Add null character at end of line
line = realloc(line, line_length+1);
line[line_length] = 0;
The biggest problem with this is that it's slow, and especially slow for long lines. A better approach would be to keep track of the allocated and written size, and exponentially-grow size of the array as necessary, and then trim to the actual required length at the end.
Also, it'd probably be better (and simpler) to use fgets for that approach.
For reading multiple lines, you can nest this approach.

Related

How to return 2d char array (char double pointer) in C?

I am reading a file that contains several lines of strings(max length 50 characters). To store those strings I created a char double-pointer using calloc. The way my code works is as it finds a line in the file it adds one new row (char *) and 50 columns (char) and then stores the value.
My understanding is that I can call this method and get this pointer with values in return. However, I was not getting the values so I check where I am losing it and I found that the memory is not persisting after while loop. I am able to print strings using print 1 statement but print 2 gives me null.
Please let me know what I am doing wrong here.
char **read_file(char *file)
{
FILE *fp = fopen(file, "r");
char line[50] = {0};
char **values = NULL;
int index = 0;
if (fp == NULL)
{
perror("Unable to open file!");
exit(1);
}
// read both sequence
while (fgets(line, 50, fp))
{
values = (char **)calloc(index + 1, sizeof(char *));
values[index] = (char *)calloc(50, sizeof(char));
values[index] = line;
printf("%s",values[index]); // print 1
index++;
}
fclose(fp);
printf("%s", values[0]); // print 2
return values;
}
line content is overwritten on each loop iteration (by fgets()).
values is overwritten (data loss) and leaks memory on each iteration index > 1.
value[index] is allocated memory on each iteration which leaks as you overwrite it with the address of line on the following line.
line is a local variable so you cannot return it to caller where it will be out of scope.
caller has no way to tell how many entries values contain.
Here is a working implementation with a few changes. On error it closes the file and frees up memory allocated and return NULL instead of exiting. Moved printf() to caller:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define BUF_LEN 50
char **read_file(char *file) {
FILE *fp = fopen(file, "r");
if(!fp) {
perror("Unable to open file!");
return NULL;
}
char **values = NULL;
char line[BUF_LEN];
unsigned index;
for(index = 0;; index++) {
char **values2 = realloc(values, (index + 1) * sizeof(char *));
if(!values2) {
perror("realloc failed");
goto err;
}
values = values2;
if(!fgets(line, BUF_LEN, fp)) break;
values[index] = strdup(line);
}
fclose(fp);
values[index] = NULL;
return values;
err:
fclose(fp);
for(unsigned i = 0; i < index; i++) {
free(values[i]);
}
free(values);
return NULL;
}
int main() {
char **values = read_file("test.txt");
for(unsigned i = 0; values[i]; i++) {
printf("%s", values[i]);
free(values[i]);
}
free(values);
return 0;
}
fgets() returns line ending in '\n' or at most BUF_LEN - 1 of data. This means a given value[i] may or may not be ending with a \n. You may want this behavior, or you want value[i] to be consistent and not contain any trailing \n irregardless of the input.
strdup() is _POSIX_C_SOURCE >= 200809L and not standard c,
so if you build with --std=c11 the symbol would not be defined.

How to sum up numbers from each lines in file in c?

I need to sum up the numbers from each line in the file like this e.g.:
1 2 3
10 -1 -3
and the result I should write to another file in each line likes this:
6
6
And I have the problem when in each line after the last number in reading file have more spaces, for example, maybe I use the '_' to show this problem:
When my function works:
10_11_12 '\n'
1_2_3 '\n'
and when my function doesn't work:
10_11_12_ _ _ '\n'
1_2_3 '\n'
I think I know where is the problem, but I have no idea how to fix it.
It's my function here:
int num=0;
char s;
while(fscanf(file, "%d", &num)==1){
fscanf(file, "%c", &s);
sum+=num;
if(s=='\n'){
fprintf(res_file, "%d\n", sum);
sum=0;
}
}
The problem is that fscanf is expecting a pointer to a char. Within your function, you are using a regular char, s.
char s;
You can fix your issue by making s a pointer. First, Allocate memory.
char *s = malloc(sizeof(char) + 1);
Now we can properly scan into the variable, s, and then check for the newline character. The only difference here is now we check for the newline by dereferencing s.
if (*s == '\n')
Don't forget to clean up the memory leak with free()!
free(s);
I was able to get the desired output using the code below.
#include <stdio.h>
#include <stdlib.h>
int processInputFile(char *filename)
{
FILE *ifp;
int buffer = 0;
char *newline = malloc(sizeof(char) + 1);
int sum = 0;
if ((ifp = fopen(filename, "r")) == NULL)
{
fprintf(stderr, "Failed to open \"%s \" in processInputFile.\n", filename);
return -1;
}
while(fscanf(ifp, "%d", &buffer) == 1)
{
fscanf(ifp, "%c", newline);
sum += buffer;
if (*newline == '\n')
{
printf("%d\n", sum);
sum = 0;
}
}
free (newline);
fclose(ifp);
}
int main(int argc, char **argv)
{
if (argc < 2)
{
printf("Proper syntax: ./a.out <n>\n");
return -1;
}
processInputFile(argv[1]);
return 0;
}
Any kind of line-by-line processing in C is easier done by reading the line first, and then processing it. fgets(3) handles end-of-line for you; then you just need to scan what it read. Plus, in the real world, some lines won't scan: either they'll have errors, or your scan won't be general enough. When that happens, it's awfully handy to write the input to standard error, so you can see what you're looking at.
Here's a complete program that does what you want. It assumes lines are less than 80 bytes long and doesn't protect against invalid input, though.
#include <stdio.h>
#include <err.h>
int main( int argc, char *argv[] ) {
char line[80];
static const char *filename = "sum.dat";
FILE *input;
if( (input = fopen(filename, "r")) == NULL ) {
err(1, "could not open %s", filename);
}
for( int nlines = 0;
fgets(line, sizeof(line), input) != NULL;
nlines++ )
{
double value, sum = 0;
int n;
for( char *p = line; sscanf(p, "%lf%n", &value, &n) > 0; p += n ) {
sum += value;
}
printf( "line %d: sum = %lf\n", nlines, sum );
}
return 0;
}
Reading with a line-oriented input function like fgets() or POSIX getline() ensures that a complete line of input is consumed on each call. (don't skimp on buffer size). strtol was created to convert an unknown number of values per-line into long. You walk-a-pointer down your buffer by utilizing the endptr parameter filled by strtol after a successful conversion to point to the next character after the last digit converted.
This allows a simple method to use a pair of pointers, p your start-pointer and ep your end-pointer to work through an entire line converting values as you go. The basic approach is to call strtol, validate it succeeded, and then set p = ep; to advance to the start of your next conversion. strtol ignores leading whitespace.
Putting it altogether, you could do:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
/* (don't skimp on buffer-size) */
int main (int argc, char **argv) {
char buf[MAXC]; /* buffer to hold each line read */
size_t n = 0; /* line-counter */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) { /* read each line */
char *p = buf, *ep = p; /* pointer and end-pointer */
int sum = 0; /* variable to hold sum */
if (*buf == '\n') /* ignore empty lines */
continue;
while (*p && *p != '\n') {
errno = 0;
long tmp = strtol (p, &ep, 0); /* convert to temp long */
if (p == ep) { /* validate digits were converted */
fputs ("error: no digits extracted.\n", stderr);
break;
}
else if (errno) { /* validate no under/overflow occurred */
fputs ("error: underflow/overflow occurred.\n", stderr);
break;
}
else if (tmp < INT_MIN || INT_MAX < tmp) { /* validate in range */
fputs ("error: tmp exceeds range of int.\n", stderr);
break;
}
sum += tmp; /* add tmp to sum */
p = ep; /* set p to end-ptr (one past last digit used) */
}
n++; /* advance line counter */
printf ("sum line [%2zu] : %d\n", n, sum); /* output sum */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
return 0;
}
(note: the if (*buf == '\n') which tests if the first character in the line is a newline character and simple skips to the next line, no need to worry about converting values in a empty line)
Example Use/Output
Using your data in dat/sumlines.txt produces the expected results.
$ ./bin/sumline dat/sumlines.txt
sum line [ 1] : 6
sum line [ 2] : 6
Let me know if you have further questions.

My code crashes at long texts? Reading from file character by character into a dynamic char array in C

I want to read a whole text from txt file, then print it.
It should be with dynamic memory managment and character by character.
My code works good with short texts (max. 40 characters), but crashes when I read more characters.
#include <stdio.h>
#include <stdlib.h>
int main()
{
char *s;
int n, i;
s = (char*) malloc(sizeof(char));
n=0;
FILE *f=fopen("input.txt","r");
while (fscanf(f, "%c", &s[n]) != EOF)
n++;
fclose(f);
free(s);
for (i=0;i<n;i++)
printf("%c",s[i]);
return 0;
}
Please don't cast malloc.
You allocating the wrong number of bytes, sizeof(char) returns you the size of
a single char, so you are allocating one byte only. That's not enough to hold
a string.
You should use malloc like this:
int *a = malloc(100 * sizeof *a);
if(a == NULL)
{
// error handling
// do not continue
}
Using sizeof *a is better than sizeof(int), because it will always return
the correct amount of bytes. sizeof(int) will do that as well, but the problem
here is the human factor, it's easy to make a mistake and write sizeof(*int)
instead. There are thousands of questions here with this problem.
Note that a char is defined to have the size of 1, that's why when you are
allocating memory for strings or char arrays, people usually don't write the
* sizeof *a:
char *s = malloc(100);
// instead of
char *s = malloc(100 * sizeof *s);
would be just fine. But again, this is only the case for char. For other types
you need to use sizeof operator.
You should always check the return value of malloc, because if it returns
NULL, you cannot access that memory.
while (fscanf(f, "%c", &s[n]) != EOF)
n++;
If you for example allocated 100 spaces, you have to check that you haven't
reached the limit. Otherwise you will overflow s:
char *s = malloc(100);
if(s == NULL)
{
fprintf(stderr, "not enough memory\n");
return 1;
}
int n = 0;
while ((fscanf(f, "%c", &s[n]) != EOF) && n < 100)
n++;
In this case you are not using the allocated memory to store a string, so it's
fine that it doesn't have the '\0'-terminating byte. However, if you want to
have a string, you need to write one:
while ((fscanf(f, "%c", &s[n]) != EOF) && n < 99)
n++;
s[n] = '\0';
Also you are doing this
free(s);
for (i=0;i<n;i++)
printf("%c",s[i]);
You are freeing the memory and then trying to access it. You have to do the
other way round, access then free.
Correct way:
for (i=0;i<n;i++)
printf("%c",s[i]);
free(s);
EDIT
If you want the contents of a whole file in a single string, then you have 2
options:
Calculate the length of the file beforehand and then use allocate the correct
amount of data
Read one fixed size chunk of bytes at a time and resize the memory every time
you read a new chunk.
The first one is easy, the second one is a little bit more complicated because
you have to read the contents, look how much you've read, resize the memory with
realloc, check that the resizing was successful. This is something you could
do later when you have for knowledge in simple memory managment.
I'll show you the first one, because it's much easier. The function fseek
allows you to advance your file pointer to the end of the file, with the
function ftell you can get the size of the file and with rewind
rewind the file pointer and set it to the beginning:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv)
{
if(argc != 2)
{
fprintf(stderr, "usage: %s file\n", argv[0]);
return 0;
}
FILE *fp = fopen(argv[1], "r");
if(fp == NULL)
{
fprintf(stderr, "Could not open %s for reading.\n", argv[1]);
return 1;
}
// calculating the size
// setting file pointer to the end of the file
if(fseek(fp, 0L, SEEK_END) < 0)
{
fprintf(stderr, "Could not set the file pointer to the end\n");
fclose(fp);
return 1;
}
// getting the size
long size = ftell(fp);
if(size < 0)
{
fprintf(stderr, "Could not calculate the size\n");
fclose(fp);
return 1;
}
printf("file size of %s: %ld\n", argv[1], size);
// rewinding the file pointer to the beginning of the file
rewind(fp);
char *s = malloc(size + 1); // +1 for the 0-terminating byte
if(s == NULL)
{
fprintf(stderr, "not enough memory\n");
fclose(fp);
return 1;
}
int n = 0;
// here the check && n < size is not needed
// you allocated enough memory already
while(fscanf(fp, "%c", &s[n]) != EOF)
n++;
s[n] = '\0'; // writing the 0-terminating byte
fclose(fp);
printf("Contents of file %s\n\n", argv[1]);
for(int i=0; i<n; i++)
printf("%c",s[i]);
free(s);
return 0;
}
You are only allocating space for one single char (sizeof(char)):
s = (char*) malloc(sizeof(char));
If you want to store more characters you have to allocate more space, for example:
s = (char*) malloc(sizeof(char) * 100);
To read the whole file you best first find out how large the file is, so that you know how much space you need to allocate for s.
Calculate size of the file with the help of fseek.
fseek(fp, 0L, SEEK_END); sz = ftell(fp);
You can then seek back,to the begining of file
fseek(fp, 0L, SEEK_SET);
Allocate that much memory through malloc.
S= (char*)malloc((sizeof(char)×sz)+1)
Now you can use this memory to copy bytes in while lopp

Detecting the rows of an array and finding common words of two arrays in C

I have a filed called a1.txt which contains the words
amazing
malevolent
permanent
and another one called a2.txt with
Amazing
Bridge
Malevolent
Here is the code that I use to read the files into arrays, thanks to #M Oehm.
NOTE: void b(); is the same as void a() but it reads a2.txt instead.
void a();
void b();
char (*a1)[50];
char (*a2)[50];
int n;
int main(int argc, char *argv[]) {
a();
printf("\n\n");
b();
int i=0, j=0;
for (i; i < strlen(*a1); i++)
{
for (j; j <strlen(*a2); j++)
{
printf("\n%d", strcmp(a1[i], a2[j]));
}
}
return 0;
}
void a(){
FILE *f;
int i;
f = fopen("a1.txt", "r");
if (f == NULL) {
fprintf(stderr, "Can't open file\n");
exit(1);
}
/* first pass */
n = 0;
while (fscanf(f, "%*s") != EOF) n++; /* star means: scan, but don't store */
a1 = malloc((n + 1) * sizeof(*a1));
if (a1 == NULL) {
fprintf(stderr, "Allocation failed\n");
exit(1);
}
/* second pass */
fseek(f, 0, SEEK_SET);
for (i = 0; i < n; i++) {
fscanf(f, "%49s", a1[i]);
}
*a1[n] = '\0';
/* process words */
for (i = 0; i < n; i++) {
printf("%s\n",a1[i]);
}}
As you can see the rows of the arrays are dynamic(I used three words as a test, however this should be done for an unknown amount of words hence the usage of calloc). Is it possible to detect the rows of each array and write the common words of each in a new file?
Finding the common words is a simple matter, I assume, of using strstr.
You seem to have some misconceptions about memory allocation:
char *str[50] creates an array of 50 (uninitialised) pointers of char. Perhaps you want char (*str)[50], which is a pointer to an array of 50 chars, to which you can allocate memory.
lSize is the length of the file, i.e. the number of chars. It looks a bit as if you wanted to count the number of words.
I'll present two strategies for reading words into a char array.
Read fixed-size words
This strategy uses a fixed word size of 50, as in your example. It opens the file and reads it in two passes. The first to determine the number of words, the next to read the actual words after allocating enough space.
int main(int argc, char *argv[])
{
FILE *f;
char (*str)[50]; /* Pointer to words of max length 49 */
int n; /* number of words */
int i;
if (argc != 2) {
fprintf(stderr, "Usage: $fifo file_name.ip\n");
exit(1);
}
f = fopen(argv[1], "r");
if (f == NULL) {
fprintf(stderr, "Can't open file\n");
exit(1);
}
/* first pass */
n = 0;
while (fscanf(f, "%*s") != EOF) n++; /* star means: scan, but don't store */
str = malloc((n + 1) * sizeof(*str));
if (str == NULL) {
fprintf(stderr, "Allocation failed\n");
exit(1);
}
/* second pass */
fseek(f, 0, SEEK_SET);
for (i = 0; i < n; i++) {
fscanf(f, "%49s", str[i]);
}
*str[n] = '\0';
/* process words */
for (i = 0; i < n; i++) {
printf("%4d: '%s'\n", i, str[i]);
}
free(str);
return 0;
}
This approach is reasonably simple, but it has two drawbacks: You will waste memory, because most words won't be 50 characters long. And you have to scan the file twice. Both drawbacks are not serious on modern computers.
Allocate as you go
You can also maintain the words as pointers to pointers to char, char **str. str[i] gives you a word, which is stored as pointer into existing memory of a null-terminated string. The function strtok gives you such strings.
This "existing memory" is the contents of the file as char buffer. Rohan has shown you how to get ti: By getting the file length, allocating and reading.
This method takes only one pass, because it reallocates memory according to its needs. Start with space for, say, 64 words, read them, find out we need more, so reallocate to make 128 words fit, read words 64-127, and so on.
int main(int argc, char *argv[])
{
FILE *f;
char *buf; /* Buffer that hold the file's contets */
size_t size; /* Size of that buffer */
char **str; /* Array of pointers to words in that buffer */
int n; /* number of words */
int nalloc; /* For how many words space is allocated */
int i;
if (argc != 2) {
fprintf(stderr, "Usage: $fifo file_name.ip\n");
exit(1);
}
f = fopen(argv[1], "r");
if (f == NULL) {
fprintf(stderr, "Can't open file\n");
exit(1);
}
fseek(f, 0, SEEK_END);
size = ftell(f);
fseek(f, 0, SEEK_SET);
buf = malloc(size + 1);
if (buf == NULL) {
fprintf(stderr, "Allocation failed\n");
exit(1);
}
/* read whoe file */
fread(buf, 1, size, f);
buf[size] = '\0';
fclose(f);
n = 0;
nalloc = 0;
str = NULL;
for (;;) {
if (n >= nalloc) {
/* reallocate */
nalloc = nalloc * 2;
if (nalloc == 0) nalloc = 64;
str = realloc(str, nalloc * sizeof(*str));
if (str == NULL) {
fprintf(stderr, "Reallocation failed\n");
exit(1);
}
}
str[n] = strtok(n ? NULL : buf, " \t\n\r");
if (str[n] == NULL) break;
n++;
}
/* process words */
for (i = 0; i < n; i++) {
printf("%4d: '%s'\n", i, str[i]);
}
free(buf);
free(str);
return 0;
}
This approach is more efficient, but also more complicated. Note how many variables I need to keep track of everything: The llocated size, the actual size, the size of the text buffer. And I have to take care of two allocated arrays.
Given that you want to read two files, it makes sense to pack these variables into a structure and read each file into such a structure.
Conclusion
These are only two of many ways to read words from a file. Both are not trivial and require that you understand how to manage memory.
I think one of the most basic things to learn is that a pointer may be used for many different things. It can just point to existing memory, whether that has been allocated or is an automatic array. But it can also be used as a handle to allocated memory; it will then behave like an array, excapt that you have to free the memory after use. You should not "move" such pointers, i.e. change the address they point to.
Both kinds of pointers look the same in your code, but you have to know which pointer acts as what.
With
char *a1[50];
char *a2[50]; //not used so can remove
You are creating array of char pointers, not array of characters. You may want to just use char pointers as
char *a1;
char *a2;
Then instead of
a1[50] = calloc(1, lSize +1);
do
a1 = calloc(1, lSize +1);
Using a1[50] as in your code is incorrect and will cause undefined behavior (including segmentation fault). The array elements are from 0 to 49, so last element is a1[49].
Also, you can use lSize to read those many characters as below
for (i=0; i <lSize; i++)
{
if (fscanf(file, "%c", &a1[i]) == 1){
printf("%c", a1[i]);
}
}
But may be you can skip the for loop limit and read from file until there is no error.

Segmentation Fault when read textfile to 2d array

I have written some code to read each lines of textfile to 2d array.
/* FileProcess.c library */
#define LINE_SIZE 128 /* Max line's length = 256 characters */
extern ulong
File_ReadLine (FILE *fptr,
char **result)
{
char buff_line[LINE_SIZE], *p;
ulong nLines = 0UL;
/* Check if fptr is readable */
if (fptr == NULL) {
printf("File not found.\n");
return -1;
}
/*get number of lines; from http://stackoverflow.com/a/3837983 */
while (fgets(buff_line, LINE_SIZE, fptr))
if (!(strlen(buff_line) == LINE_SIZE-1 && buff_line[LINE_SIZE-2] != '\n'))
nLines++;
/* Allocating memory for result */
result = malloc(nLines * sizeof(char *)); //
/* Pointer return to begin of file */
rewind(fptr);
/* Getting lines */
int i = 0;
while (!feof(fptr)) {
/* Get current line to buff_line */
fgets(buff_line, LINE_SIZE, fptr);
/* Replace '\n' at the end of line */
char *c = strchr(buff_line, '\n');
if (c != NULL)
*c = '\0';
/* Handle '\n' at the end of file */
if (feof(fptr))
break;
/* Memory allocate for p */
result[i] = malloc (LINE_SIZE * sizeof(char));
/* Copy buff_line to p */
strcpy(result[i], buff_line);
i++;
}
return (nLines);
}
main program:
int main ()
{
char **Phone;
FILE *fptr;
fptr = fopen("phone.na.txt", "r");
ulong nLines = File_ReadLine(fptr, Phone);
printf("%ld\n", nLines);
int i;
for (i = 0; i < nLines; i++) {
printf("%s", Phone[i]);
}
fclose(fptr);
return 1;
}
Using gdb, running line by line, program return segmentation fault after
printf("%s", Phone[i]);
So I can't understand why segmentation fault here? Are there any errors with malloc() ?
I haven't compiled or run the code, but I think the problem is in your line counter:
while (fgets(buff_line, LINE_SIZE, fptr))
if (!(strlen(buff_line) == LINE_SIZE-1 && buff_line[LINE_SIZE-2] != '\n'))
nLines++;
What you're saying here is unless "the string length of buff_line is equal to LINE_SIZE -1 and the character at buff_line[LINE_SIZE-1] is not equal to '\n'", increment nLines.
So... whenever you read a line out of your text file which ends with '\n', and that line is 127 characters long, you're not going to increment nLines. You malloc spaces for nLines, but you're probably going to read more than nLines of data from your file... at that point, you're writing more into **result than you have allocated, and bad things are going to happen.

Resources