Storing each line of a text file into an array - c

I am trying to save each line of a text file into an array.
They way I am doing it and works fine so far is this :
char *lines[40];
char line[50];
int i = 0 ;
char* eof ;
while( (eof = fgets(line, 50, in)) != NULL )
{
lines[i] = strdup(eof); /*Fills the array with line of the txt file one by one*/
i++;
}
My text file has 40 lines , which I am accessing with a for loop
for( j = 0; j <= 39 ; j++)
{ /*Do something to each line*/}.
So far so good. My problem is that i define the size of the array lines
for the a text file that has 40 lines. I tried to count the lines and then define the size but I am getting segmentation fault.
My approach:
int count=1 ; char c ;
for (c = getc(in); c != EOF; c = getc(in))
if (c == '\n') // Increment count if this character is newline
count = count + 1;
printf("\nNUMBER OF LINES = %d \n",count);
char* lines[count];
Any ideas ?

As an aside, I tested the exact code you show above to get line count (by counting newline characters), on a file containing more than 1000 lines, and with some lines 4000 char long. The problem is not there.
The seg fault is therefore likely due to the way you are allocating memory for each line buffer. You may be attempting to write a long line to a short buffer. (maybe I missed it in your post, but could not find where you addressed line length?)
Two things useful when allocating memory for storing strings in a file are number of lines, and the maximum line length in the file. These can be used to create the array of char arrays.
You can get both line count and longest line by looping on fgets(...): (a variation on your theme, essentially letting fgets find the newlines)
int countLines(FILE *fp, int *longest)
{
int i=0;
int max = 0;
char line[4095]; // max for C99 strings
*longest = max;
while(fgets(line, 4095, fp))
{
max = strlen(line);
if(max > *longest) *longest = max;//record longest
i++;//track line count
}
return i;
}
int main(void)
{
int longest;
char **strArr = {0};
FILE *fp = fopen("C:\\dev\\play\\text.txt", "r");
if(fp)
{
int count = countLines(fp, &longest);
printf("%d", count);
GetKey();
}
// use count and longest to create memory
strArr = create2D(strArr, count, longest);
if(strArr)
{
//use strArr ...
//free strArr
free2D(strArr, lines);
}
......and so on
return 0;
}
char ** create2D(char **a, int lines, int longest)
{
int i;
a = malloc(lines*sizeof(char *));
if(!a) return NULL;
{
for(i=0;i<lines;i++)
{
a[i] = malloc(longest+1);
if(!a[i]) return NULL;
}
}
return a;
}
void free2D(char **a, int lines)
{
int i;
for(i=0;i<lines;i++)
{
if(a[i]) free(a[i]);
}
if(a) free(a);
}

There are many ways to approach this problem. Either declare a static 2D array or char (e.g. char lines[40][50] = {{""}};) or declare a pointer to array of type char [50], which is probably the easiest for dynamic allocation. With that approach you only need a single allocation. With constant MAXL = 40 and MAXC = 50, you simply need:
char (*lines)[MAXC] = NULL;
...
lines = malloc (MAXL * sizeof *lines);
Reading each line with fgets is a simple task of:
while (i < MAXL && fgets (lines[i], MAXC, fp)) {...
When you are done, all you need to do is free (lines); Putting the pieces together, you can do something like:
#include <stdio.h>
#include <stdlib.h>
enum { MAXL = 40, MAXC = 50 };
int main (int argc, char **argv) {
char (*lines)[MAXC] = NULL; /* pointer to array of type char [MAXC] */
int i, n = 0;
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* valdiate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if (!(lines = malloc (MAXL * sizeof *lines))) { /* allocate MAXL arrays */
fprintf (stderr, "error: virtual memory exhausted 'lines'.\n");
return 1;
}
while (n < MAXL && fgets (lines[n], MAXC, fp)) { /* read each line */
char *p = lines[n]; /* assign pointer */
for (; *p && *p != '\n'; p++) {} /* find 1st '\n' */
*p = 0, n++; /* nul-termiante */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
/* print lines */
for (i = 0; i < n; i++) printf (" line[%2d] : '%s'\n", i + 1, lines[i]);
free (lines); /* free allocated memory */
return 0;
}
note: you will also want to check to see if the whole line was read by fgets each time. (say you had a long line of more than 38 chars in the file). You do this by checking whether *p is '\n' before overwriting with the nul-terminating character. (e.g. if (*p != '\n') { int c; while ((c = getchar()) != '\n' && c != EOF) {} }). That insures the next read with fgets will begin with the next line, instead of the remaining characters in the current line.
To include the check you could do something similar to the following (note: I changed the read loop counter from i to n to eliminate the need for assigning n = i; following the read loop).
while (n < MAXL && fgets (lines[n], MAXC, fp)) { /* read each line */
char *p = lines[n]; /* assign pointer */
for (; *p && *p != '\n'; p++) {} /* find 1st '\n' */
if (*p != '\n') { /* check line read */
int c; /* discard remainder of line with getchar */
while ((c = fgetc (fp)) != '\n' && c != EOF) {}
}
*p = 0, n++; /* nul-termiante */
}
It is up to you whether you discard or keep the remainder of lines that exceed the length of your array. However, it is a good idea to always check. (the lines of text in my example input below are limited to 17-chars so there was no possibility of a long line, but you generally cannot guarantee the line length.
Example Input
$ cat dat/40lines.txt
line of text - 1
line of text - 2
line of text - 3
line of text - 4
line of text - 5
line of text - 6
...
line of text - 38
line of text - 39
line of text - 40
Example Use/Output
$ ./bin/fgets_ptr2array <dat/40lines.txt
line[ 1] : 'line of text - 1'
line[ 2] : 'line of text - 2'
line[ 3] : 'line of text - 3'
line[ 4] : 'line of text - 4'
line[ 5] : 'line of text - 5'
line[ 6] : 'line of text - 6'
...
line[38] : 'line of text - 38'
line[39] : 'line of text - 39'
line[40] : 'line of text - 40'
Now include a the length check in code and add a long line to the input, e.g.:
$ cat dat/40lines+long.txt
line of text - 1
line of text - 2
line of text - 3 + 123456789 123456789 123456789 123456789 65->|
line of text - 4
...
Rerun the program and you can confirm you have now protected against long lines in the file mucking up your sequential read of lines from the file.
Dynamically Reallocating lines
If you have an unknown number of lines in your file and you reach your initial allocation of 40 in lines, then all you need do to keep reading additional lines is realloc storage for lines. For example:
int i, n = 0, maxl = MAXL;
...
while (fgets (lines[n], MAXC, fp)) { /* read each line */
char *p = lines[n]; /* assign pointer */
for (; *p && *p != '\n'; p++) {} /* find 1st '\n' */
*p = 0; /* nul-termiante */
if (++n == maxl) { /* if limit reached, realloc lines */
void *tmp = realloc (lines, 2 * maxl * sizeof *lines);
if (!tmp) { /* validate realloc succeeded */
fprintf (stderr, "error: realloc - virtual memory exhausted.\n");
break; /* on failure, exit with existing data */
}
lines = tmp; /* assign reallocated block to lines */
maxl *= 2; /* update maxl to reflect new size */
}
}
Now it doesn't matter how many lines are in your file, you will simply keep reallocating lines until your entire files is read, or you run out of memory. (note: currently the code reallocates twice the current memory for lines on each reallocation. You are free to add as much or as little as you like. For example, you could allocate maxl + 40 to simply allocate 40 more lines each time.
Edit In Response To Comment Inquiry
If you do want to use a fixed increase in the number of lines rather than scaling by some factor, you must allocate for a fixed number of additional lines (the increase times sizeof *lines), you can't simple add 40 bytes, e.g.
void *tmp = realloc (lines, (maxl + 40) * sizeof *lines);
if (!tmp) { /* validate realloc succeeded */
fprintf (stderr, "error: realloc - virtual memory exhausted.\n");
break; /* on failure, exit with existing data */
}
lines = tmp; /* assign reallocated block to lines */
maxl += 40; /* update maxl to reflect new size */
}
Recall, lines is a pointer-to-array of char[50], so for each additional line you want to allocate, you must allocate storage for 50-char (e.g. sizeof *lines), so the fixed increase by 40 lines will be realloc (lines, (maxl + 40) * sizeof *lines);, then you must accurately update your max-lines-allocated count (maxl) to reflect the increase of 40 lines, e.g. maxl += 40;.
Example Input
$ cat dat/80lines.txt
line of text - 1
line of text - 2
...
line of text - 79
line of text - 80
Example Use/Output
$ ./bin/fgets_ptr2array_realloc <dat/80lines.txt
line[ 1] : 'line of text - 1'
line[ 2] : 'line of text - 2'
...
line[79] : 'line of text - 79'
line[80] : 'line of text - 80'
Look it over and let me know if you have any questions.

Related

How to fscanf word by word in a file?

I have a file with a series of words separated by a white space. For example file.txt contains this: "this is the file". How can I use fscanf to take word by word and put each word in an array of strings?
Then I did this but I don't know if it's correct:
char *words[100];
int i=0;
while(!feof(file)){
fscanf(file, "%s", words[i]);
i++;
fscanf(file, " ");
}
When reading repeated input, you control the input loop with the input function itself (fscanf in your case). While you can also loop continually (e.g. for (;;) { ... }) and check independently whether the return is EOF, whether a matching failure occurred, or whether the return matches the number of conversion specifiers (success), in your case simply checking that the return matches the single "%s" conversion specifier is fine (e.g. that the return is 1).
Storing each word in an array, you have several options. The most simple is using a 2D array of char with automatic storage. Since the longest non-medical word in the Unabridged Dictionary is 29-characters (requiring a total of 30-characters with the nul-terminating character), a 2D array with a fixed number of rows and fixed number of columns of at least 30 is fine. (dynamically allocating allows you to read and allocate memory for as many words as may be required -- but that is left for later.)
So to set up storage for 128 words, you could do something similar to the following:
#include <stdio.h>
#define MAXW 32 /* if you need a constant, #define one (or more) */
#define MAXA 128
int main (int argc, char **argv) {
char array[MAXA][MAXW] = {{""}}; /* array to store up to 128 words */
size_t n = 0; /* word index */
Now simply open your filename provided as the first argument to the program (or read from stdin by default if no argument is given), and then validate that your file is open for reading, e.g.
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
Now to the crux of your read-loop. Simply loop checking the return of fscanf to determine success/failure of the read, adding words to your array and incrementing your index on each successful read. You must also include in your loop-control a check of your index against your array bounds to ensure you do not attempt to write more words to your array than it can hold, e.g.
while (n < MAXA && fscanf (fp, "%s", array[n]) == 1)
n++;
That's it, now just close the file and use your words stored in your array as needed. For example just printing the stored words you could do:
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < n; i++)
printf ("array[%3zu] : %s\n", i, array[i]);
return 0;
}
Now just compile it, With Warnings Enabled (e.g. -Wall -Wextra -pedantic for gcc/clang, or /W3 on (VS, cl.exe) and then test on your file. The full code is:
#include <stdio.h>
#define MAXW 32 /* if you need a constant, #define one (or more) */
#define MAXA 128
int main (int argc, char **argv) {
char array[MAXA][MAXW] = {{""}}; /* array to store up to 128 words */
size_t n = 0; /* word index */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (n < MAXA && fscanf (fp, "%s", array[n]) == 1)
n++;
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < n; i++)
printf ("array[%3zu] : %s\n", i, array[i]);
return 0;
}
Example Input File
$ cat dat/thefile.txt
this is the file
Example Use/Output
$ ./bin/fscanfsimple dat/thefile.txt
array[ 0] : this
array[ 1] : is
array[ 2] : the
array[ 3] : file
Look things over and let me know if you have further questions.
strtok() might be a function that can help you here.
If you know that the words will be separated by whitespace, then calling strtok will return the char pointer to the start of the next word.
Sample code from https://www.systutorials.com/docs/linux/man/3p-strtok/
#include <string.h>
...
char *token;
char *line = "LINE TO BE SEPARATED";
char *search = " ";
/* Token will point to "LINE". */
token = strtok(line, search);
/* Token will point to "TO". */
token = strtok(NULL, search);
In your case, the space character would also act as a delimiter in the line.
Note that strtok might modify the string passed in, so if you need to you should make a deep copy using something like malloc.
It might also be easier to use fread() to read a block from a file
As mentioned in comments, using feof() does not work as would be expected. And, as described in this answer unless the content of the file is formatted with very predictable content, using any of the scanf family to parse out the words is overly complicated. I do not recommend using it for that purpose.
There are many other, better ways to read content of a file, word by word. My preference is to read each line into a buffer, then parse the buffer to extract the words. This requires determining those characters that may be in the file, but would not be considered part of a word. Characters such as \n,\t, (space), -, etc. should be considered delimiters, and can be used to extract the words. The following is a recipe for extracting words from a file: (example code for a few of the items is included below these steps.)
Read file to count words, and get the length of the longest word.
Use count, and longest values from 1st step to allocate memory for words.
Rewind the file.
Read file line by line into a line buffer using while(fgets(line, size, fp))
Parse each new line into words using delimiters and store each word into arrays of step 2.
Use resulting array of words as necessary.
free all memory allocated when finished with arrays
Some example of code to do some of these tasks:
// Get count of words, and longest word in file
int longestWord(char *file, int *nWords)
{
FILE *fp=0;
int cnt=0, longest=0, numWords=0;
int c;
fp = fopen(file, "r");
if(fp)
{
// if((strlen(buf) > 0) && (buf[0] != '\t') && (buf[0] != '\n') && (buf[0] != '\0')&& (buf[0] > 0))
while ( (c = fgetc(fp) ) != EOF )
{
if ( isalnum (c) ) cnt++;
else if ( ( ispunct (c) ) || ( isspace(c) ) || (c == '\0' ))
{
(cnt > longest) ? (longest = cnt, cnt=0) : (cnt=0);
numWords++;
}
}
*nWords = numWords;
fclose(fp);
}
else return -1;
return longest;
}
// Create indexable memory for word arrays
char ** Create2DStr(ssize_t numStrings, ssize_t maxStrLen)
{
int i;
char **a = {0};
a = calloc(numStrings, sizeof(char *));
for(i=0;i<numStrings; i++)
{
a[i] = calloc(maxStrLen + 1, 1);
}
return a;
}
Usage: For a file with 25 words, the longest being 80 bytes:
char **strArray = Create2DStr(25, 80+1);//creates 25 array locations
//each 80+1 characters long
//(+1 is room for null terminator.)
int i=0;
char words[50][50];
while(fscanf(file, " %s ", words[i]) != EOF)
i++;
I wouldn't entirely recommend doing it this way, because of the unknown amount of words in the file, and the unknown length of a "word". Either can be over the size of '50'. Just do it dynamically, instead. Still, this should show you how it works.
How can I use fscanf to take word by word and put each word in an array of strings?
Read each word twice: first to find length via "%n". 2nd time, save it. (Inefficient yet simple)
Re-size strings as you go. Again inefficient, yet simple.
// Rough untested sample code - still need to add error checking.
size_t string_count = 0;
char **strings = NULL;
for (;;) {
long pos = ftell(file);
int n = 0;
fscanf(file, "%*s%n", &n); // record where scanning a "word" stopped
if (n == 0) break;
fseek(file, pos, SEEK_SET); // go back;
strings = realloc(strings, sizeof *strings * (string_count+1));// increase array size
strings[string_count] = malloc(n + 1u); // Get enough memory for the word
fscanf(file, "%s ", strings[string_count] ); // read/save word
}
// use strings[], string_count
// When done, free each strings[] and then strings

Reading input for more than one array, line by line,without knowing the number of inputs to be given

I was given 3 arrays and the input for each array is given in a single line with space between each element.
Example input:
3 2 1 1 1
4 3 2
1 1 4 1
So what I am trying to do is to assign all the elements of first line to array 1, second line to array 2 and third line to array 3.
#include <stdio.h>
int main()
{
int a[20],b[20],c[20],d[3];
int k=0;
char temp;
do{
scanf("%d%c", &a[k], &temp);
k++;
} while(temp != '\n');
d[0]=k;
k=0;
do{
scanf("%d%c", &b[k], &temp);
k++;
} while(temp != '\n');
d[1]=k;
k=0;
do{
scanf("%d%c", &c[k], &temp);
k++;
} while(temp != '\n');
d[2]=k;
return 0;
}
This is what I tried, but this code saves all the elements in the first array itself. Any help?
I've just tried your code and it works fine - a, b, c are filled with the numbers entered via stdin.
However, your primary problem is that scanf is not line oriented. You should instead use fgets to read the line in a string and parse it with strtok and sscanf.
Taking the recommendation to use fgets is one thing, putting it into use the first time is quite another. You use fgets (or POSIX getline) because they provide a mechanism for reading an entire line of text into a buffer at once. This eliminates the pitfalls inherent in trying to use scanf for that purpose.
While POSIX getline will handle a line of any length for you, it dynamically allocates storage for the resulting buffer. fgets on the other hand will read only as many characters as can be stored in the size you specify in the fgets call (reserving space for the nul-character, as fgets always provides a nul-terminated buffer)
This means it is up to you to check that a complete line fit into the buffer you provided for fgets use. Essentially you want to check whether the buffer is full and the last character is not the '\n' character. Note, you are not concerned with trimming the trailing newline here, just in checking for its presence to validate whether a complete line was read. So here you can check whether the length of buffer is your max size (minus 1 for the nul-character) and the last character is not '\n'. If those two conditions exist, you have no way of knowing whether the entire line was read (but see the note after this example). A simple approach to the validation whether a full line was read into buf is, e.g.
while (fgets (buf, MAXC, fp)) {
...
size_t len = strlen (buf); /* length for line validation */
/* validate whole line read into buf - exit on error */
if (len == MAXC - 1 && buf[len - 1] != '\n') {
fprintf (stderr, "error: line %d too long.\n", row + 1);
return 1;
}
(note: for the corner-case of a file without a POSIX eof (end-of-file), e.g. without a '\n' following the last line of text, there is a chance you could actually read an exact buffer full of characters and have no trailing '\n', but still have a complete read -- you can check for EOF with a call to getchar() and return the character to the buffer with putchar if it is other than EOF)
Now on to handling your arrays. Rather than declaring separate arrays of 20 int each, instead declare a 2D array of n row of 20 int each. This makes handling the read and indexing much easier.
You also have the problem of having to capture the number of values you store in each row. While you can do a little indexing magic and store the number of values in each row as the first-column value, it is probably a bit easier just to have a separate array of n values where each index corresponds to the number of values store for each row in your 2D array. For example,
int row = 0, /* row count during read */
idx[ROWS] = {0}, /* array holding col count per row */
arr[ROWS][COLS] = {{0}}; /* 2D array holding each line array */
That way, each time you add a value to one of your rows, you simply increment the corresponding value in idx, e.g.
/* fill a value in row, then */
idx[row]++; /* update col-index for array */
With that background, you are finally ready to start filling your array. The approach is straight-forward. You will:
use an outer loop reading a complete line using fgets (buf, MAXC, fp);
initialize inner loop variable (for offset, etc.);
check that a complete line was read (as shown above);
use an inner loop over buf using sscanf to repeatedly parse a single-integer from buf until all integers are read;
(really 4(a.)) (you call sscanf on buf + offset from the beginning), saving the number characters consumed (saved with the %n format specifier to update offset);
update offset with the number of characters consumed, and repeat.
(note: it is up to you to protect your array bounds to make sure you do not attempt to store more integer values in each array than you have storage for, and that you do not try and store more rows than you have storage for. So on each the outer and inner loop you will add a check to limit the number of rows and columns you read to the available storage)
Your read loops implementing the steps above could look like the following:
/* constants for max rows, cols, and chars for read buf */
enum { ROWS = 4, COLS = 20, MAXC = 512 };
...
while (row < ROWS && fgets (buf, MAXC, fp)) { /* read each line */
int col = 0, /* col being filled */
nchr = 0, /* no. chars consumed by sscanf */
offset = 0, /* offset in buf for next sscaf call */
tmp = 0; /* temp var to hold sscanf conversion */
size_t len = strlen (buf); /* length for line validation */
/* validate whole line read into buf - exit on error */
if (len == MAXC - 1 && buf[len - 1] != '\n') {
fprintf (stderr, "error: line %d too long.\n", row + 1);
return 1;
}
while (col < COLS && /* read each value in line into arr */
sscanf (buf + offset, "%d%n", &tmp, &nchr) == 1) {
arr[row][col++] = tmp; /* assign tmp to array */
offset += nchr; /* update offset in buffer */
idx[row]++; /* update col-index for array */
}
row++; /* increment row for next read */
}
Putting it altogether, you could do something like the following:
#include <stdio.h>
#include <string.h>
/* constants for max rows, cols, and chars for read buf */
enum { ROWS = 4, COLS = 20, MAXC = 512 };
int main (int argc, char **argv) {
int row = 0, /* row count during read */
idx[ROWS] = {0}, /* array holding col count per row */
arr[ROWS][COLS] = {{0}}; /* 2D array holding each line array */
char buf[MAXC] = ""; /* buffer for fgets */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
while (row < ROWS && fgets (buf, MAXC, fp)) { /* read each line */
int col = 0, /* col being filled */
nchr = 0, /* no. chars consumed by sscanf */
offset = 0, /* offset in buf for next sscaf call */
tmp = 0; /* temp var to hold sscanf conversion */
size_t len = strlen (buf); /* length for line validation */
/* validate whole line read into buf - exit on error */
if (len == MAXC - 1 && buf[len - 1] != '\n') {
fprintf (stderr, "error: line %d too long.\n", row + 1);
return 1;
}
while (col < COLS && /* read each value in line into arr */
sscanf (buf + offset, "%d%n", &tmp, &nchr) == 1) {
arr[row][col++] = tmp; /* assign tmp to array */
offset += nchr; /* update offset in buffer */
idx[row]++; /* update col-index for array */
}
row++; /* increment row for next read */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (int i = 0; i < row; i++) { /* output the arrays read */
for (int j = 0; j < idx[i]; j++)
printf (" %3d", arr[i][j]);
putchar ('\n');
}
return 0;
}
Note: rather than using a fixed size 2D array, you can take things a step further and instead use a pointer-to-pointer-to-int (e.g. a double-pointer, int **arr;) and dynamically allocate and reallocate pointers for rows, as required, and dynamically allocate and reallocate the storage assigned to each pointer to handle any number of integer values per-row. While it is not that much additional work, that is left as an exercise to you when you get to dynamic allocation in your studies. What you are doing with an differing number of column values per-row is creating a jagged array.
Example Input File
Using your input file for testing, e.g.:
$ cat dat/3arr.txt
3 2 1 1 1
4 3 2
1 1 4 1
Example Use/Output
Produces the following output:
$ ./bin/arr_jagged dat/3arr.txt
3 2 1 1 1
4 3 2
1 1 4 1
Look things over and let me know if you have further questions.

How to get the strings from a file and store in a 2D char array and compare that 2D char array with a string in C?

I have a text file, it has values(I usually call them as upc_values) of
01080006210
69685932764
40000114485
40000114724
07410855329
72908100004
66484101000
04000049163
43701256600
99999909001
07726009493
78732510053
78732510063
78732510073
78732510093
02842010109
02842010132
78732510213
02410011035
73999911110
char *UPC_val = "99999909001";
char upcbuf[100][12];
char buf[12];
memset(buf,0,sizeof(buf));
memset(upcbuf,0,sizeof(upcbuf));
When I tried to fgets, I stored that in a 2D buffer.
while ( fgets(buf, sizeof(buf), f) != NULL ) {
strncpy(upcbuf[i], buf, 11);
i++;
}
I tried to print the data in the buffer.
puts(upcbuf[0]);
upcbuf[0] has the whole data in a continues stream,
0108000621069685932764400001144854000011472407410855329729081000046648410100004000049163437012566009999990900107726009493787325100537873251006378732510073787325100930284201010902842010132787325102130241001103573999911110
and I want to compare this upc values(11 digit) with another string(11 digit). I used,
if(strncmp(UPC_Val,upcbuf[i],11) == 0)
{
//do stuff here
}
It didn't work properly, I used strstr() too like,
if(strstr(upcbuf[0],UPC_val) != NULL)
{
//do stuff here
}
I am totally unaware of what it is doing, am I doing the comparison properly?
How to do this, any help please?
Thanks in advance.
To read a line of text of 11 digits and a '\n' into a string needs an array of at least 13 to store the string. There is little reason to be so tight. Suggest 2x expected max size
char upcbuf[100][12]; // large enough for 100 * (11 digits and a \0)
...
#define BUF_SIZE (13*2)
char buf[BUF_SIZE];
while (i < 100 && fgets(buf, sizeof buf, f) != NULL ) {
Lop off the potential tailing '\n'
size_t len = strlen(buf);
if (len && buf[len-1] == '\n') buf[--len] = '\0';
Check length and handle that somehow.
if (len != 11) exit(EXIT_FAILURE);
Save/print the data
// strncpy(upcbuf[i], buf, 11); // fails to insure a null character at the end
strcpy(upcbuf[i], buf);
i++;
puts(upcbuf[i]);
To compare strings
if(strcmp(UPC_Val,upcbuf[i]) == 0) {
// strings match
}
If you are still having trouble getting the logic to work after #chux's answer, then here is a short example implementing his suggestions that takes the filename to read as the first argument, and optionally the upc to search for as the second argument (it will search for "99999909001" by default [and it that case you can just read the file in on stdin]).
Note the use of an enum to define global constants for your row and column values. (you can use independent #define ROW 128 and #define COL 32 if you like) If you need constants in your code, define them once, at the top, so if they ever need to change, you have a single convenient place to change the values, rather than having to pick through your code, or perform a global search/replace to change them.
For example, you could put the logic together as follows:
#include <stdio.h>
#include <string.h>
enum { COL = 32, ROW = 128 }; /* an enum is convenient for constants */
int main (int argc, char **argv) {
char buf[COL] = "", /* buffer to read each line */
upcbuf[ROW][COL] = { "" }, /* 2D array of ROW x COL chars */
*upcval = argc > 2 ? argv[2] : "99999909001";
size_t n = 0; /* index/counter */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin; /* file */
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
/* fill upcbuf (you could search at same time, but let's fill) */
while (n < ROW && fgets (buf, COL, fp)) {
size_t len = strlen (buf); /* get length */
/* test last char '\n', overwrite w/nul-terminating char */
if (len && buf[len - 1] == '\n')
buf[--len] = 0;
strcpy (upcbuf[n++], buf); /* copy to upcbuf */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
/* step through upcbuf - search for upcval */
for (size_t i = 0; i < n; i++)
if (strcmp (upcbuf[i], upcval) == 0) {
printf ("upcval: '%s' found at line '%zu'.\n", upcval, i + 1);
return 0;
}
printf ("upcval: '%s' not found in file.\n", upcval);
return 0;
}
Example Use/Output
$ ./bin/upcbuf dat/upcfile.txt
upcval: '99999909001' found at line '10'.
$ ./bin/upcbuf dat/upcfile.txt 01080006210
upcval: '01080006210' found at line '1'.
$ ./bin/upcbuf dat/upcfile.txt 02410011035
upcval: '02410011035' found at line '19'.
$ ./bin/upcbuf dat/upcfile.txt "not there!"
upcval: 'not there!' not found in file.
Also note that if you were simply searching for a single upc, then you could combine read and search in a single loop, but since you often read as a separate function, and then operate on the data elsewhere in your code, this example simply reads all upc values from the file into your array, and then searches though the array in a separate loop. Look things over, look at all answers, and let us know if you have any further questions.
As a final note, you have checked if the last char is '\n', but what happens if it isn't? You should check if the length is COL-1 indicating that additional characters remain unread in that line and handle the error (or just read and discard the remaining chars). You can do that with an addition similar to the following:
/* test last char '\n', overwrite w/nul-terminating char */
if (len && buf[len - 1] == '\n')
buf[--len] = 0;
else if (len == COL - 1) { /* if no '\n' & len == COL - 1 */
fprintf (stderr, "error: line excces %d chars.\n", COL - 1);
return 1;
}
And, you need to use the else if and check the COL - 1 and not simply use an else there because you may be reading from a file that does not have a POSIX end-of-line (e.g. a new-line character) after the final line of the file. fgets properly reads the final line, even without a POSIX line ending, but there will be no '\n' in buf. So even without the POSIX line ending, the line can be a valid line, and you are guaranteed to have a complete read, so long as the number of characters read (+ the nul-terminating char) does not equal your buffer size.

File Handling + character manipulation

this is my code.
the input numbers are
1234567890
the output of this code should be
(123)456-7890
but the output is different. Any advice or error fixes in my code?
#include <stdio.h>
#include <ctype.h>
int main()
{
char ch;
int a[100], s[100], str, k, i;
FILE *fp;
fp = fopen("number.c", "r");
while ( ( ch = fgetc(fp) ) != EOF )
{
k = 0;
a[k] = '(';
a[k+4] = ')';
a[k+8] = '-';
for (i = 0; s[i] != '\0'; i++)
{
if (isdigit(s[i]))
{
a[k++] = s[i];
if (k == 3)
{
k++;
}
}
printf("%s", a);
}
fclose(fp);
return 0;
}
}
This looks like an assignment from a first year course in CS. If so, I would say find a TA during office hours and discuss.
There are several issues with the code:
Your outer loop is intending to read a line at a time from a file and populate the s array. It is instead reading a character at a time and populating the ch variable.
As mentioned in the comments, you are not accounting for the "-" when putting characters into the a array.
You are not terminating your string in the a array.
There may be different schools of thought on this in c, but I would make s and a char[] instead of int[].
My advice would be to get out a piece of paper and make spaces for each of your variables. Then read your code line by line and manipulate your variables the way you expect the computer to execute what is written. If you can read what is written, rather than what you expect the code to do, then the issues will become apparent.
/* ugly: The old phone #
nice: The formatted phone #
*/
#include <stdio.h>
void fmtpn(const char *ugly, char *nice)
{
int i, j;
/* add one to allocate space for null terminator */
char first3[3 + 1], next3[3 + 1], last4[4 + 1];
if (strlen(ugly) != 10 || containsalpha(ugly)) {
strcpy(nice, "Invalid pn!");
return;
}
for (i = 0; i < 3; ++i)
first3[i] = ugly[i];
first3[i] = 0; /* null terminate the string */
for (j = 0; j < 3; ++i, ++j)
next3[j] = ugly[i];
next3[j] = 0; /* null terminate */
for (j = 0; j < 4; ++i, ++j)
last4[j] = ugly[i];
last4[j] = 0; /* null terminate */
sprintf(nice, "(%s) %s-%s", first3, next3, last4);
}
To read from the file:
FILE *fp;
char ugly[32], good[32];
if (fp = fopen("file", "r")) {
fgets(ugly, 32, fp);
fmtpn(ugly, good);
puts(good);
}
No love for sscanf?
#include <stdio.h>
int prettyprint(char *input, char *output)
{
int n[10], ret;
ret = sscanf(input, "%1d%1d%1d%1d%1d%1d%1d%1d%1d%1d", &(n[0]), &(n[1]),
&(n[2]), &(n[3]), &(n[4]), &(n[5]), &(n[6]),
&(n[7]), &(n[8]), &(n[9]));
if (ret != 10)
fprintf(stderr, "invalid input\n");
sprintf(output, "(%1d%1d%1d) %1d%1d%1d-%1d%1d%1d%1d",
n[0], n[1], n[2],
n[3],n[4], n[5],
n[6], n[7], n[8], n[9]);
return 0;
}
int main(int argc, char **argv)
{
char digits[] = "0123456789";
char output[256];
prettyprint(digits, output);
printf("%s\n", output);
}
You have other options aside from looping through your sting to build the phone number. Sometimes, when dealing with fixed strings or known quantities, a straight forward packing of the characters into a fixed format is a lot simpler than picking the characters out of loops.
For example, here you know you are dealing with a 10 char string of digits. In your code you can read/parse each line into a string of 10 digits. Then your only task is to format those 10 digits into the phone number. Using a pointer for each string and then strncpy is about as easy as anything else:
#include <stdio.h>
#include <string.h>
int main (void) {
char *digits = "1234567890";
char *p = digits;
char phonenum[15] = {0};
char *pf = phonenum;
/* build formatted phone number */
*pf++ = '(';
strncpy (pf, p, 3);
pf += 3, p += 3;
*pf++ = ')';
*pf++ = ' '; /* note: included space, remove line if unwanted */
strncpy (pf, p, 3);
pf += 3, p += 3;
*pf++ = '-';
strncpy (pf, p, 4);
pf += 4;
*pf = 0;
printf ("\n digits : %s\n phone : %s\n\n", digits, phonenum);
return 0;
}
Output
$ ./bin/phnumbld
digits : 1234567890
phone : (123) 456-7890
You can easily turn the code above into a simple function that creates a formatted phone number given any 10-digit string. Breaking your code down into functional pieces not only makes your code easier to read and write, but it also builds flexibility and ease of maintenance into your code. Here were you dealing with an actual dial-string that included the international dialing prefix and country code, you could easily format the last 10 digits of the longer string by using a pointer to the appropriate beginning character.
With File Handling
Writing anything in C is no different. You simply break the problem down into discrete operations and then write small bits of code to handle each part of the problem. As you get more experience, you will build a collection of routines to handle most situations.
Below the code declare three constants. ACPN (area code phone number length), MAXC (maximum digits in dial string including country code and international dialing prefix), and MAXS (maximum number of chars in line to read from file)
You options for reading lines of data in C are broken into two broad categories, character oriented input and line oriented input. When reading lines from a file, in most cases line oriented input is the proper choice. You read a line of data at a time into a buffer, then you parse the information you need from the buffer. Your primary choices for line oriented input in C are fgets and getline. We use the standard fgets below.
Below, the code will read a line of data, then call get_n_digits to extract up to MAXC digits in the line into a separate buffer holding the digits (numstr). The number string is then passed to fmt_phone which takes the last 10 digits in the string (discarding any initial country-code or int'l dialing prefix) and formatting those digits into a telephone number format. You can adjust any part as needed to meet your input file:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ACPN 10
#define MAXC 16
#define MAXS 256
size_t strip_newline (char *s);
char *get_n_digits (char *numstr, char *s, size_t n);
char *fmt_phone (char *fmts, char *s, size_t n);
int main (int argc, char **argv) {
/* open file or read from stdin */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) {
fprintf (stderr, "error: failed to open file for reading\n");
return 1;
}
char numstr[MAXC] = {0}; /* string of digits (max MAXC - 1) */
char fmtstr[MAXC] = {0}; /* formatted phone number string */
char line[MAXS] = {0}; /* line buffer holding full line */
/* read each line from fp (up to MAXS chars) */
while (fgets (line, MAXS, fp))
{
size_t len = strip_newline (line); /* strip trailing newline */
get_n_digits (numstr, line, MAXC); /* get MAXC digits from line */
printf ("\n read : %s (%zu chars), taking last 10 of : %s\n",
line, len, numstr);
/* format last 10 digits into phone number */
fmt_phone (fmtstr, numstr, ACPN);
printf (" phone : %s\n", fmtstr);
}
if (fp != stdin) fclose (fp);
return 0;
}
size_t strip_newline (char *s)
{
size_t len = strlen (s);
s [--len] = 0;
return len;
}
/* extract upto n digits from string s, copy to numstr */
char *get_n_digits (char *numstr, char *s, size_t n)
{
char *p = s;
size_t idx = 0;
while (*p && idx < n - 1) {
if (*p >= '0' && *p <= '9')
numstr[idx++] = *p;
p++;
}
numstr[idx] = 0;
return numstr;
}
/* format last n (10) digits in s into a formatted
telephone number: (xxx) yyy-zzzz, copy to fmts.
'last 10' accounts for country code and international
dialing prefix at beginning of dial string.
*/
char *fmt_phone (char *fmts, char *s, size_t n)
{
/* validate strings */
if (!fmts || !s) {
fprintf (stderr, "%s() error: invalid string parameter.\n", __func__);
*fmts = 0;
return fmts;
}
/* validate length of n */
if (n < ACPN) {
fprintf (stderr, "%s() error: insufficient size 'n' for format.\n", __func__);
*fmts = 0;
return fmts;
}
/* validate length of s */
size_t len = strlen (s);
if (len < n) {
fprintf (stderr, "%s() error: insufficient digits in string.\n", __func__);
*fmts = 0;
return fmts;
}
/* set start pointer to last 10 digits */
char *p = len > n ? s + len - n : s;
char *pf = fmts;
/* build formatted phone number */
*pf++ = '(';
strncpy (pf, p, 3);
pf += 3, p += 3;
*pf++ = ')';
*pf++ = ' ';
strncpy (pf, p, 3);
pf += 3, p += 3;
*pf++ = '-';
strncpy (pf, p, 4);
pf += 4;
*pf = 0;
return fmts;
}
Compile with gcc -Wall -Wextra -o progname sourcename.c
Example Input
$ cat dat/pnumtest.txt
123456789012345
12345678901234
1234567890123
123456789012
12345678901
1234567890
123456789
Example Output
$ ./bin/phnum dat/pnumtest.txt
read : 123456789012345 (15 chars), taking last 10 of : 123456789012345
phone : (678) 901-2345
read : 12345678901234 (14 chars), taking last 10 of : 12345678901234
phone : (567) 890-1234
read : 1234567890123 (13 chars), taking last 10 of : 1234567890123
phone : (456) 789-0123
read : 123456789012 (12 chars), taking last 10 of : 123456789012
phone : (345) 678-9012
read : 12345678901 (11 chars), taking last 10 of : 12345678901
phone : (234) 567-8901
read : 1234567890 (10 chars), taking last 10 of : 1234567890
phone : (123) 456-7890
read : 123456789 (9 chars), taking last 10 of : 123456789
fmt_phone() error: insufficient digits in string.
phone :
Note: there are many, many different ways to approach this problem, this is but one.
Note2: while not required for this code, I included a function showing how to strip the trailing newline ('\n') from the input read by fgets. It is never a good idea to leave newlines dangling from strings in your code. While here they would not have caused a problem, in most cases they will bite you if your are not aware of them. So get in the practice of handling/removing the trailing newlines when using fgets or getline to read from a file. (note: getline provides the number of characters actually read as its return, so you can avoid calling strlen and simply use the return of getline to remove the newline in that case.)

read rows of ints from a file in C

how can I read rows of ints from a txt file in C.
input.txt
3
5 2 3
1
2 1 3 4
the first 3 means there are 3 lines in all.
I write a c++ version using cin cout sstream
but I wonder how can I make it in C using fscanf or other c functions.
I need to handle each line of integers separately.
I know I can use getline to read the whole line into a buffer, then parse the string buffer to get the integers.
Can I just use fscanf?
What you ultimately want to do is free yourself from having to worry about the format of your inputfile. You want a routine that is flexible enough to read each row and parse the integers in each row and allocate memory accordingly. That greatly improves the flexibility of your routine and minimizes the amount of recoding required.
As you can tell from the comments there are many, many, many valid ways to approach this problem. The following is a quick hack at reading all integers in a file into an array, printing the array, and then cleaning up and freeing the memory allocated during the program. (note: the checks for reallocating are shown in comments, but omitted for brevity).
Note too that the storage for the array is allocated with calloc which allocates and sets the memory to 0. This frees you from the requirement of keeping a persistent row and column count. You can simply iterate over values in the array and stop when you encounter an uninitialized value. Take a look over the code and let me know if you have any questions:
#include <stdio.h>
#include <stdlib.h>
#define MROWS 100
#define MCOLS 20
int main (int argc, char **argv) {
if (argc < 2) {
fprintf (stderr, "error: insufficient input. usage: %s filename\n", argv[0]);
return 1;
}
FILE *fp = fopen (argv[1], "r");
if (!fp) {
fprintf (stderr, "error: file open failed for '%s'.\n", argv[1]);
return 1;
}
char *line = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
int **array = NULL; /* array of ptrs to array of int */
size_t ridx = 0; /* row index value */
size_t cidx = 0; /* col index value */
char *endptr = NULL; /* endptr to use with strtol */
/* allocate MROWS (100) pointers to array of int */
if (!(array = calloc (MROWS, sizeof *array))) {
fprintf (stderr, "error: array allocation failed\n");
return 1;
}
/* read each line in file */
while ((nchr = getline (&line, &n, fp)) != -1)
{
/* strip newline or carriage return (not req'd) */
while (line[nchr-1] == '\r' || line[nchr-1] == '\n')
line[--nchr] = 0;
if (!nchr) /* if line is blank, skip */
continue;
/* allocate MCOLS (20) ints for array[ridx] */
if (!(array[ridx] = calloc (MCOLS, sizeof **array))) {
fprintf (stderr, "error: array[%zd] allocation failed\n", ridx);
return 1;
}
cidx = 0; /* reset cidx */
char *p = line; /* assign pointer to line */
/* parse each int in line into array */
while ((array[ridx][cidx] = (int)strtol (p, &endptr, 10)) && p != endptr)
{
/* checks for underflow/overflow omitted */
p = endptr; /* increment p */
cidx++; /* increment cidx */
/* test cidx = MCOLS & realloc here */
}
ridx++; /* increment ridx */
/* test for ridx = MROWS & realloc here */
}
/* free memory and close input file */
if (line) free (line);
if (fp) fclose (fp);
printf ("\nArray:\n\n number of rows with data: %zd\n\n", ridx);
/* reset ridx, output array values */
ridx = 0;
while (array[ridx])
{
cidx = 0;
while (array[ridx][cidx])
{
printf (" array[%zd][%zd] = %d\n", ridx, cidx, array[ridx][cidx]);
cidx++;
}
ridx++;
printf ("\n");
}
/* free allocated memory */
ridx = 0;
while (array[ridx])
{
free (array[ridx]);
ridx++;
}
if (array) free (array);
return 0;
}
input file
$ cat dat/intfile.txt
3
5 2 3
1
2 1 3 4
program output
$ ./bin/readintfile dat/intfile.txt
Array:
number of rows with data: 4
array[0][0] = 3
array[1][0] = 5
array[1][1] = 2
array[1][2] = 3
array[2][0] = 1
array[3][0] = 2
array[3][1] = 1
array[3][2] = 3
array[3][3] = 4
In C (not C++) you should combine fgets with sscanf function.
EDIT:
But as an answer for the question "Can I just use fscanf?"
try this example (where usage of fgetc allows using fscanf instead of fgets+sscanf):
int lnNum = 0;
int lnCnt = 0; // line counter
int ch; // single character
// read number of lines
fscanf(f, "%d", &lnNum);
if(lnNum < 1)
{
return 1; // wrong line number
}
// reading numbers line by line
do{
res = fscanf(f, "%d", &num);
// analyse res and process num
// ....
// check the next character
ch = fgetc(f);
if(ch == '\n')
{
lnCnt++; // one more line is finished
}
} while (lnNum > lnCnt && !feof(f) );
NOTE: This code will work when your file has only numbers separated by single '\n' or spaces, for case of letters or combinations as number \n (space before newline) it becomes unstable

Resources