I have a file like this:
...
words 13
more words 21
even more words 4
...
(General format is a string of non-digits, then a space, then any number of digits and a newline)
and I'd like to parse every line, putting the words into one field of the structure, and the number into the other. Right now I am using an ugly hack of reading the line while the chars are not numbers, then reading the rest. I believe there's a clearer way.
Edit: You can use pNum-buf to get the length of the alphabetical part of the string, and use strncpy() to copy that into another buffer. Be sure to add a '\0' to the end of the destination buffer. I would insert this code before the pNum++.
int len = pNum-buf;
strncpy(newBuf, buf, len-1);
newBuf[len] = '\0';
You could read the entire line into a buffer and then use:
char *pNum;
if (pNum = strrchr(buf, ' ')) {
pNum++;
}
to get a pointer to the number field.
fscanf(file, "%s %d", word, &value);
This gets the values directly into a string and an integer, and copes with variations in whitespace and numerical formats, etc.
Edit
Ooops, I forgot that you had spaces between the words.
In that case, I'd do the following. (Note that it truncates the original text in 'line')
// Scan to find the last space in the line
char *p = line;
char *lastSpace = null;
while(*p != '\0')
{
if (*p == ' ')
lastSpace = p;
p++;
}
if (lastSpace == null)
return("parse error");
// Replace the last space in the line with a NUL
*lastSpace = '\0';
// Advance past the NUL to the first character of the number field
lastSpace++;
char *word = text;
int number = atoi(lastSpace);
You can solve this using stdlib functions, but the above is likely to be more efficient as you're only searching for the characters you are interested in.
Given the description, I think I'd use a variant of this (now tested) C99 code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
struct word_number
{
char word[128];
long number;
};
int read_word_number(FILE *fp, struct word_number *wnp)
{
char buffer[140];
if (fgets(buffer, sizeof(buffer), fp) == 0)
return EOF;
size_t len = strlen(buffer);
if (buffer[len-1] != '\n') // Error if line too long to fit
return EOF;
buffer[--len] = '\0';
char *num = &buffer[len-1];
while (num > buffer && !isspace((unsigned char)*num))
num--;
if (num == buffer) // No space in input data
return EOF;
char *end;
wnp->number = strtol(num+1, &end, 0);
if (*end != '\0') // Invalid number as last word on line
return EOF;
*num = '\0';
if (num - buffer >= sizeof(wnp->word)) // Non-number part too long
return EOF;
memcpy(wnp->word, buffer, num - buffer);
return(0);
}
int main(void)
{
struct word_number wn;
while (read_word_number(stdin, &wn) != EOF)
printf("Word <<%s>> Number %ld\n", wn.word, wn.number);
return(0);
}
You could improve the error reporting by returning different values for different problems.
You could make it work with dynamically allocated memory for the word portion of the lines.
You could make it work with longer lines than I allow.
You could scan backwards over digits instead of non-spaces - but this allows the user to write "abc 0x123" and the hex value is handled correctly.
You might prefer to ensure there are no digits in the word part; this code does not care.
You could try using strtok() to tokenize each line, and then check whether each token is a number or a word (a fairly trivial check once you have the token string - just look at the first character of the token).
Assuming that the number is immediately followed by '\n'.
you can read each line to chars buffer, use sscanf("%d") on the entire line to get the number, and then calculate the number of chars that this number takes at the end of the text string.
Depending on how complex your strings become you may want to use the PCRE library. At least that way you can compile a perl'ish regular expression to split your lines. It may be overkill though.
Given the description, here's what I'd do: read each line as a single string using fgets() (making sure the target buffer is large enough), then split the line using strtok(). To determine if each token is a word or a number, I'd use strtol() to attempt the conversion and check the error condition. Example:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/**
* Read the next line from the file, splitting the tokens into
* multiple strings and a single integer. Assumes input lines
* never exceed MAX_LINE_LENGTH and each individual string never
* exceeds MAX_STR_SIZE. Otherwise things get a little more
* interesting. Also assumes that the integer is the last
* thing on each line.
*/
int getNextLine(FILE *in, char (*strs)[MAX_STR_SIZE], int *numStrings, int *value)
{
char buffer[MAX_LINE_LENGTH];
int rval = 1;
if (fgets(buffer, buffer, sizeof buffer))
{
char *token = strtok(buffer, " ");
*numStrings = 0;
while (token)
{
char *chk;
*value = (int) strtol(token, &chk, 10);
if (*chk != 0 && *chk != '\n')
{
strcpy(strs[(*numStrings)++], token);
}
token = strtok(NULL, " ");
}
}
else
{
/**
* fgets() hit either EOF or error; either way return 0
*/
rval = 0;
}
return rval;
}
/**
* sample main
*/
int main(void)
{
FILE *input;
char strings[MAX_NUM_STRINGS][MAX_STRING_LENGTH];
int numStrings;
int value;
input = fopen("datafile.txt", "r");
if (input)
{
while (getNextLine(input, &strings, &numStrings, &value))
{
/**
* Do something with strings and value here
*/
}
fclose(input);
}
return 0;
}
Related
I wanted to only count the number of strings in a text file, containing numbers as well. But the code below, counts even the numbers in the file as strings. How do I rectify the problem?
int count;
char *temp;
FILE *fp;
fp = fopen("multiplexyz.txt" ,"r" );
while(fscanf(fp,"%s",temp) != EOF )
{
count++;
}
printf("%d ",count);
return 0;
}
Well, first up, using the temp pointer without having backing storage for it is going to cause you a world of pain.
I'd suggest, as a start, using something like char temp[1000] instead, keeping in mind that's still a bit risky if you have words more than a thousand or so characters long (that's a different issue to the one you're asking about so I'll mention it but not spend too much time on fixing it).
Secondly, it appears you want to count words with numbers (like alpha7 or pi/2). If that's the case, you simply need to check temp after reading the "word" and increment count only if it matches a "non-numeric" pattern.
That could be as simple as just not incrementing if the word consists only of digits, or it could be complicated if you want to handle decimals, exponential formats and so on.
But the bottom line remains the same:
while(fscanf(fp,"%s",temp) != EOF )
{
if (! isANumber(temp))
count++;
}
with a suitable definition of isANumber. For example, for unsigned integers only, something like this would be a good start:
int isANumber (char *str) {
// Empty string is not a number.
if (*str == '\0')
return 0;
// Check every character.
while (*str != '\0') {
// If non-digit, it's not a number.
if (! isdigit (*str))
return 0;
str++;
}
// If all characters were digits, it was a number.
return 1;
}
For more complex checking, you can use the strto* calls in C, giving them the temp buffer and ensuring you use the endptr method to ensure the entire string is scanned. Off the top of my head, so not well tested, that would go something like:
int isANumber (char *str) {
// Empty string is not a number.
if (*str == '\0')
return 0;
// Use strtod to get a double.
char *endPtr;
long double d = strtold (str, &endPtr);
// Characters unconsumed, not number (things like 42b).
if (*endPtr != '\0')
return 0;
// Was a long double, so number.
return 1;
}
The only thing you need to watch out for there is that certain strings like NaN or +Inf are considered a number by strtold so you may need extra checks for that.
inside your while loop, loop through the string to check if any of its characters are digits. Something like:
while(*temp != '\0'){
if(isnumber(*temp))
break;
}
[dont copy exact same code]
I find strpbrk to be one of the most helpful function to search for several needles in a haystack. Your set of needles being the numeric characters "0123456789" which if present in a line read from your file will count as a line. I also prefer POSIX getline for a line count do to its proper handling of files with non-POSIX line endings for the last line (both fgets and wc -l omit text (and a count) of the last line if it does not contain a POSIX line end ('\n'). That said, a small function that searches a line for characters contained in a trm passed as a parameter could be written as:
/** open and read each line in 'fn' returning the number of lines
* continaing any of the characters in 'trm'.
*/
size_t nlines (char *fn, char *trm)
{
if (!fn) return 0;
size_t lines = 0, n = 0;
char *buf = NULL;
FILE *fp = fopen (fn, "r");
if (!fp) return 0;
while (getline (&buf, &n, fp) != -1)
if (strpbrk (buf, trm))
lines++;
fclose (fp);
free (buf);
return lines;
}
Simply pass the filename of interest and the terms to search for in each line. A short test code with a default term of "0123456789" that takes the filename as the first parameter and the term as the second could be written as follows:
#include <stdio.h> /* printf */
#include <stdlib.h> /* free */
#include <string.h> /* strlen, strrchr */
size_t nlines (char *fn, char *trm);
int main (int argc, char **argv) {
char *fn = argc > 1 ? argv[1] : NULL;
char *srch = argc > 2 ? argv[2] : "0123456789";
if (!fn) return 1;
printf ("%zu %s\n", nlines (fn, srch), fn);
return 0;
}
/** open and read each line in 'fn' returning the number of lines
* continaing any of the characters in 'trm'.
*/
size_t nlines (char *fn, char *trm)
{
if (!fn) return 0;
size_t lines = 0, n = 0;
char *buf = NULL;
FILE *fp = fopen (fn, "r");
if (!fp) return 0;
while (getline (&buf, &n, fp) != -1)
if (strpbrk (buf, trm))
lines++;
fclose (fp);
free (buf);
return lines;
}
Give it a try and see if this is what you are expecting, if not, just let me know and I am glad to help further.
Example Input File
$ cat dat/linewno.txt
The quick brown fox
jumps over 3 lazy dogs
who sleep in the sun
with a temp of 101
Example Use/Output
$ ./bin/getline_nlines_nums dat/linewno.txt
2 dat/linewno.txt
$ wc -l dat/linewno.txt
4 dat/linewno.txt
I have two files .csv and I need to read the whole file but it have to be filed by field. I mean, csv files are files with data separated by comma, so I cant use fgets.
I need to read all the data but I don't know how to jump to the next line.
Here is what I've done so far:
int main()
{
FILE *arq_file;
arq_file = fopen("file.csv", "r");
if(arq_file == NULL){
printf("Not possible to read the file.");
exit(0);
}
while( !feof(arq_file) ){
fscanf(arq_file, "%i %lf", &myStruct[i+1].Field1, &myStruct[i+1].Field2);
}
fclose(arq_file);
return 0;
}
It will get in a infinity loop because it never gets the next line.
How could I reach the line below the one I just read?
Update: File 01 Example
1,Alan,123,
2,Alan Harper,321
3,Jose Rendeks,32132
4,Maria da graça,822282
5,Charlie Harper,9999999999
File 02 Example
1,320,123
2,444,321
3,250,123,321
3,3,250,373,451
2,126,621
1,120,320
2,453,1230
3,12345,0432,1830
I think an example is better than giving you hints, this is a combination of fgets() + strtok(), there are other functions that could work for example strchr(), though it's easier this way and since I just wanted to point you in the right direction, well I did it like this
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int
main(void)
{
FILE *file;
char buffer[256];
char *pointer;
size_t line;
file = fopen("data.dat", "r");
if (file == NULL)
{
perror("fopen()");
return -1;
}
line = 0;
while ((pointer = fgets(buffer, sizeof(buffer), file)) != NULL)
{
size_t field;
char *token;
field = 0;
while ((token = strtok(pointer, ",")) != NULL)
{
printf("line %zu, field %zu -> %s\n", line, field, token);
field += 1;
pointer = NULL;
}
line += 1;
}
return 0;
}
I think it's very clear how the code works and I hope you can understand.
If the same code has to handle both data files, then you're stuck with reading the fields into a string, and subsequently converting the string into a number.
It is not clear from your description whether you need to do something special at the end of line or not — but because only one of the data lines ends with a comma, you do have to allow for fields to be separated by a comma or a newline.
Frankly, you'd probably do OK with using getchar() or equivalent; it is simple.
char buffer[4096];
char *bufend = buffer + sizeof(buffer) - 1;
char *curfld = buffer;
int c;
while ((c = getc(arq_file)) != EOF)
{
if (curfld == bufend)
…process overlong field…
else if (c == ',' || c == '\n')
{
*curfld = '\0';
process(buffer);
curfld = buffer;
}
else
*curfld++ = c;
}
if (c == EOF && curfld != buffer)
{
*curfld = '\0';
process(buffer);
}
However, if you want to go with higher level functions, then you do want to use fgets() to read lines (unless you need to worry about deviant line endings, such as DOS vs Unix vs old-style Mac (CR-only) line endings). Or use POSIX
getline() to read arbitrarily long lines. Then split the lines using strtok_r() or equivalent.
char *buffer = 0;
size_t buflen = 0;
while (getline(&buffer, &buflen, arq_file) != -1)
{
char *posn = buffer;
char *epos;
char *token;
while ((token = strtok_r(posn, ",\n", &epos)) != 0)
{
process(token);
posn = 0;
}
/* Do anything special for end of line */
}
free(buffer);
If you think you must use scanf(), then you need to use something like:
char buffer[4096];
char c;
while (fscanf(arq_file, "%4095[^,\n]%c", buffer, &c) == 2)
process(buffer);
The %4095[^,\n] scan set reads up to 4095 characters that are neither comma nor newline into buffer, and then reads the next character (which must, therefore, either be comma or newline — or conceivably EOF, but that causes problems) into c. If the last character in the file is neither comma nor newline, then you will skip the last field.
I'm trying to parse in a text file, and add each distinct word into a hashtable, with the words as keys, and their frequencies as values. The problem is proving to be the reading part: the file is a very large file of "normal" text, in that it has punctuation and special characters. I want to treat all non-alphabetical chars read in as word-boundaries. I have something basic going with this:
char buffer[128];
while(fscanf(fp, "%127[A-Za-z]%*c", buffer) == 1) {
printf("%s\n", buffer);
memset(buffer, 0, 128);
}
However, that chokes whenever it actually hits a non-alphabetical char preceded by whitespace (e.g., "the,cat was (brown)" would be read in as "the cat was"). I know what the issue is with that code, but I'm not sure how to get around it. Would I be better off just reading in an entire line and doing the parsing manually? I'm trying scanf because I felt that this was a pretty good candidate for the mini-regex thing that you can do with the format string.
Suggest use of isalpha(), fgetc() and a simple state-machine.
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
int AdamRead(FILE *inf, char *dest, size_t n) {
int ch;
do {
ch = fgetc(inf);
if (ch == EOF) return EOF;
} while (!isalpha(ch));
assert(n > 1);
n--; // save room for \0
while (n-- > 0) {
*dest++ = ch;
ch = fgetc(inf);
if (!isalpha(ch)) break;
}
ungetc(ch, inf); // Add this is something else may need to parse `inf`.
*dest = '\0';
return 1;
}
char buffer[128];
while(AdamRead(fp, buffer, sizeof buffer) == 1) {
printf("%s\n", buffer);
}
Note: If you want to go the "%127[A-Za-z]%*[^A-Za-z]" route, code may need to start with a one-time fscanf(fp, "*[^A-Za-z]"); to deal with leading non-letters.
There's another way apart from the one mentioned in the comment. I don't know if it's better though. You can read lines from the file using fgets and then tokenize the line using strtok_r POSIX function. Here, r means the function is reentrant which makes it thread-safe. However, you must know the maximum length a line can have in the file.
#include <stdio.h>
#include <string.h>
#define MAX_LEN 100
// in main
char line[MAX_LEN];
char *token;
const char *delim = "!##$%^&*"; // all special characters
char *saveptr; // for strtok_r
FILE *fp = fopen("myfile.txt", "r");
while(fgets(line, MAX_LEN, fp) != NULL) {
for(; ; line = NULL) {
token = strtok_r(line, delim, &saveptr);
if(token == NULL)
break;
else {
// token is a string.
// process it
}
}
}
fclose(fp);
strtok_r modifies its first argument line, so you should keep a copy of it if it needed for other purposes.
I have an input file I need to extract words from. The words can only contain letters and numbers so anything else will be treated as a delimiter. I tried fscanf,fgets+sscanf and strtok but nothing seems to work.
while(!feof(file))
{
fscanf(file,"%s",string);
printf("%s\n",string);
}
Above one clearly doesn't work because it doesn't use any delimiters so I replaced the line with this:
fscanf(file,"%[A-z]",string);
It reads the first word fine but the file pointer keeps rewinding so it reads the first word over and over.
So I used fgets to read the first line and use sscanf:
sscanf(line,"%[A-z]%n,word,len);
line+=len;
This one doesn't work either because whatever I try I can't move the pointer to the right place. I tried strtok but I can't find how to set delimitters
while(p != NULL) {
printf("%s\n", p);
p = strtok(NULL, " ");
This one obviously take blank character as a delimitter but I have literally 100s of delimitters.
Am I missing something here becasue extracting words from a file seemed a simple concept at first but nothing I try really works?
Consider building a minimal lexer. When in state word it would remain in it as long as it sees letters and numbers. It would switch to state delimiter when encountering something else. Then it could do an exact opposite in the state delimiter.
Here's an example of a simple state machine which might be helpful. For the sake of brevity it works only with digits. echo "2341,452(42 555" | ./main will print each number in a separate line. It's not a lexer but the idea of switching between states is quite similar.
#include <stdio.h>
#include <string.h>
int main() {
static const int WORD = 1, DELIM = 2, BUFLEN = 1024;
int state = WORD, ptr = 0;
char buffer[BUFLEN], *digits = "1234567890";
while ((c = getchar()) != EOF) {
if (strchr(digits, c)) {
if (WORD == state) {
buffer[ptr++] = c;
} else {
buffer[0] = c;
ptr = 1;
}
state = WORD;
} else {
if (WORD == state) {
buffer[ptr] = '\0';
printf("%s\n", buffer);
}
state = DELIM;
}
}
return 0;
}
If the number of states increases you can consider replacing if statements checking the current state with switch blocks. The performance can be increased by replacing getchar with reading a whole block of the input to a temporary buffer and iterating through it.
In case of having to deal with a more complex input file format you can use lexical analysers generators such as flex. They can do the job of defining state transitions and other parts of lexer generation for you.
Several points:
First of all, do not use feof(file) as your loop condition; feof won't return true until after you attempt to read past the end of the file, so your loop will execute once too often.
Second, you mentioned this:
fscanf(file,"%[A-z]",string);
It reads the first word fine but the file pointer keeps rewinding so it reads the first word over and over.
That's not quite what's happening; if the next character in the stream doesn't match the format specifier, scanf returns without having read anything, and string is unmodified.
Here's a simple, if inelegant, method: it reads one character at a time from the input file, checks to see if it's either an alpha or a digit, and if it is, adds it to a string.
#include <stdio.h>
#include <ctype.h>
int get_next_word(FILE *file, char *word, size_t wordSize)
{
size_t i = 0;
int c;
/**
* Skip over any non-alphanumeric characters
*/
while ((c = fgetc(file)) != EOF && !isalnum(c))
; // empty loop
if (c != EOF)
word[i++] = c;
/**
* Read up to the next non-alphanumeric character and
* store it to word
*/
while ((c = fgetc(file)) != EOF && i < (wordSize - 1) && isalnum(c))
{
word[i++] = c;
}
word[i] = 0;
return c != EOF;
}
int main(void)
{
char word[SIZE]; // where SIZE is large enough to handle expected inputs
FILE *file;
...
while (get_next_word(file, word, sizeof word))
// do something with word
...
}
I would use:
FILE *file;
char string[200];
while(fscanf(file, "%*[^A-Za-z]"), fscanf(file, "%199[a-zA-Z]", string) > 0) {
/* do something with string... */
}
This skips over non-letters and then reads a string of up to 199 letters. The only oddness is that if you have any 'words' that are longer than 199 letters they'll be split up into multiple words, but you need the limit to avoid a buffer overflow...
What are your delimiters? The second argument to strtok should be a string containing your delimiters, and the first should be a pointer to your string the first time round then NULL afterwards:
char * p = strtok(line, ","); // assuming a , delimiter
printf("%s\n", p);
while(p)
{
p = strtok(NULL, ",");
printf("%S\n", p);
}
What is the simplest way to read a full line in a C console program
The text entered might have a variable length and we can't make any assumption about its content.
You need dynamic memory management, and use the fgets function to read your line. However, there seems to be no way to see how many characters it read. So you use fgetc:
char * getline(void) {
char * line = malloc(100), * linep = line;
size_t lenmax = 100, len = lenmax;
int c;
if(line == NULL)
return NULL;
for(;;) {
c = fgetc(stdin);
if(c == EOF)
break;
if(--len == 0) {
len = lenmax;
char * linen = realloc(linep, lenmax *= 2);
if(linen == NULL) {
free(linep);
return NULL;
}
line = linen + (line - linep);
linep = linen;
}
if((*line++ = c) == '\n')
break;
}
*line = '\0';
return linep;
}
Note: Never use gets ! It does not do bounds checking and can overflow your buffer
If you are using the GNU C library or another POSIX-compliant library, you can use getline() and pass stdin to it for the file stream.
A very simple but unsafe implementation to read line for static allocation:
char line[1024];
scanf("%[^\n]", line);
A safer implementation, without the possibility of buffer overflow, but with the possibility of not reading the whole line, is:
char line[1024];
scanf("%1023[^\n]", line);
Not the 'difference by one' between the length specified declaring the variable and the length specified in the format string. It is a historical artefact.
So, if you were looking for command arguments, take a look at Tim's answer.
If you just want to read a line from console:
#include <stdio.h>
int main()
{
char string [256];
printf ("Insert your full address: ");
gets (string);
printf ("Your address is: %s\n",string);
return 0;
}
Yes, it is not secure, you can do buffer overrun, it does not check for end of file, it does not support encodings and a lot of other stuff.
Actually I didn't even think whether it did ANY of this stuff.
I agree I kinda screwed up :)
But...when I see a question like "How to read a line from the console in C?", I assume a person needs something simple, like gets() and not 100 lines of code like above.
Actually, I think, if you try to write those 100 lines of code in reality, you would do many more mistakes, than you would have done had you chosen gets ;)
getline runnable example
getline was mentioned on this answer but here is an example.
It is POSIX 7, allocates memory for us, and reuses the allocated buffer on a loop nicely.
Pointer newbs, read this: Why is the first argument of getline a pointer to pointer "char**" instead of "char*"?
main.c
#define _XOPEN_SOURCE 700
#include <stdio.h>
#include <stdlib.h>
int main(void) {
char *line = NULL;
size_t len = 0;
ssize_t read = 0;
while (1) {
puts("enter a line");
read = getline(&line, &len, stdin);
if (read == -1)
break;
printf("line = %s", line);
printf("line length = %zu\n", read);
puts("");
}
free(line);
return 0;
}
Compile and run:
gcc -ggdb3 -O0 -std=c99 -Wall -Wextra -pedantic -o main.out main.c
./main.out
Outcome: this shows on therminal:
enter a line
Then if you type:
asdf
and press enter, this shows up:
line = asdf
line length = 5
followed by another:
enter a line
Or from a pipe to stdin:
printf 'asdf\nqwer\n' | ./main.out
gives:
enter a line
line = asdf
line length = 5
enter a line
line = qwer
line length = 5
enter a line
Tested on Ubuntu 20.04.
glibc implementation
No POSIX? Maybe you want to look at the glibc 2.23 implementation.
It resolves to getdelim, which is a simple POSIX superset of getline with an arbitrary line terminator.
It doubles the allocated memory whenever increase is needed, and looks thread-safe.
It requires some macro expansion, but you're unlikely to do much better.
You might need to use a character by character (getc()) loop to ensure you have no buffer overflows and don't truncate the input.
As suggested, you can use getchar() to read from the console until an end-of-line or an EOF is returned, building your own buffer. Growing buffer dynamically can occur if you are unable to set a reasonable maximum line size.
You can use also use fgets as a safe way to obtain a line as a C null-terminated string:
#include <stdio.h>
char line[1024]; /* Generously large value for most situations */
char *eof;
line[0] = '\0'; /* Ensure empty line if no input delivered */
line[sizeof(line)-1] = ~'\0'; /* Ensure no false-null at end of buffer */
eof = fgets(line, sizeof(line), stdin);
If you have exhausted the console input or if the operation failed for some reason, eof == NULL is returned and the line buffer might be unchanged (which is why setting the first char to '\0' is handy).
fgets will not overfill line[] and it will ensure that there is a null after the last-accepted character on a successful return.
If end-of-line was reached, the character preceding the terminating '\0' will be a '\n'.
If there is no terminating '\n' before the ending '\0' it may be that there is more data or that the next request will report end-of-file. You'll have to do another fgets to determine which is which. (In this regard, looping with getchar() is easier.)
In the (updated) example code above, if line[sizeof(line)-1] == '\0' after successful fgets, you know that the buffer was filled completely. If that position is proceeded by a '\n' you know you were lucky. Otherwise, there is either more data or an end-of-file up ahead in stdin. (When the buffer is not filled completely, you could still be at an end-of-file and there also might not be a '\n' at the end of the current line. Since you have to scan the string to find and/or eliminate any '\n' before the end of the string (the first '\0' in the buffer), I am inclined to prefer using getchar() in the first place.)
Do what you need to do to deal with there still being more line than the amount you read as the first chunk. The examples of dynamically-growing a buffer can be made to work with either getchar or fgets. There are some tricky edge cases to watch out for (like remembering to have the next input start storing at the position of the '\0' that ended the previous input before the buffer was extended).
How to read a line from the console in C?
Building your own function, is one of the ways that would help you to achieve reading a line from console
I'm using dynamic memory allocation to allocate the required amount of memory required
When we are about to exhaust the allocated memory, we try to double the size of memory
And here I'm using a loop to scan each character of the string one by one using the getchar() function until the user enters '\n' or EOF character
finally we remove any additionally allocated memory before returning the line
//the function to read lines of variable length
char* scan_line(char *line)
{
int ch; // as getchar() returns `int`
long capacity = 0; // capacity of the buffer
long length = 0; // maintains the length of the string
char *temp = NULL; // use additional pointer to perform allocations in order to avoid memory leaks
while ( ((ch = getchar()) != '\n') && (ch != EOF) )
{
if((length + 1) >= capacity)
{
// resetting capacity
if (capacity == 0)
capacity = 2; // some initial fixed length
else
capacity *= 2; // double the size
// try reallocating the memory
if( (temp = realloc(line, capacity * sizeof(char))) == NULL ) //allocating memory
{
printf("ERROR: unsuccessful allocation");
// return line; or you can exit
exit(1);
}
line = temp;
}
line[length] = (char) ch; //type casting `int` to `char`
length++;
}
line[length + 1] = '\0'; //inserting null character at the end
// remove additionally allocated memory
if( (temp = realloc(line, (length + 1) * sizeof(char))) == NULL )
{
printf("ERROR: unsuccessful allocation");
// return line; or you can exit
exit(1);
}
line = temp;
return line;
}
Now you could read a full line this way :
char *line = NULL;
line = scan_line(line);
Here's an example program using the scan_line() function :
#include <stdio.h>
#include <stdlib.h> //for dynamic allocation functions
char* scan_line(char *line)
{
..........
}
int main(void)
{
char *a = NULL;
a = scan_line(a); //function call to scan the line
printf("%s\n",a); //printing the scanned line
free(a); //don't forget to free the malloc'd pointer
}
sample input :
Twinkle Twinkle little star.... in the sky!
sample output :
Twinkle Twinkle little star.... in the sky!
I came across the same problem some time ago, this was my solutuion, hope it helps.
/*
* Initial size of the read buffer
*/
#define DEFAULT_BUFFER 1024
/*
* Standard boolean type definition
*/
typedef enum{ false = 0, true = 1 }bool;
/*
* Flags errors in pointer returning functions
*/
bool has_err = false;
/*
* Reads the next line of text from file and returns it.
* The line must be free()d afterwards.
*
* This function will segfault on binary data.
*/
char *readLine(FILE *file){
char *buffer = NULL;
char *tmp_buf = NULL;
bool line_read = false;
int iteration = 0;
int offset = 0;
if(file == NULL){
fprintf(stderr, "readLine: NULL file pointer passed!\n");
has_err = true;
return NULL;
}
while(!line_read){
if((tmp_buf = malloc(DEFAULT_BUFFER)) == NULL){
fprintf(stderr, "readLine: Unable to allocate temporary buffer!\n");
if(buffer != NULL)
free(buffer);
has_err = true;
return NULL;
}
if(fgets(tmp_buf, DEFAULT_BUFFER, file) == NULL){
free(tmp_buf);
break;
}
if(tmp_buf[strlen(tmp_buf) - 1] == '\n') /* we have an end of line */
line_read = true;
offset = DEFAULT_BUFFER * (iteration + 1);
if((buffer = realloc(buffer, offset)) == NULL){
fprintf(stderr, "readLine: Unable to reallocate buffer!\n");
free(tmp_buf);
has_err = true;
return NULL;
}
offset = DEFAULT_BUFFER * iteration - iteration;
if(memcpy(buffer + offset, tmp_buf, DEFAULT_BUFFER) == NULL){
fprintf(stderr, "readLine: Cannot copy to buffer\n");
free(tmp_buf);
if(buffer != NULL)
free(buffer);
has_err = true;
return NULL;
}
free(tmp_buf);
iteration++;
}
return buffer;
}
There is a simple regex like syntax that can be used inside scanf to take whole line as input
scanf("%[^\n]%*c", str);
^\n tells to take input until newline doesn't get encountered. Then, with %*c, it reads newline character and here used * indicates that this newline character is discarded.
Sample code
#include <stdio.h>
int main()
{
char S[101];
scanf("%[^\n]%*c", S);
printf("%s", S);
return 0;
}
On BSD systems and Android you can also use fgetln:
#include <stdio.h>
char *
fgetln(FILE *stream, size_t *len);
Like so:
size_t line_len;
const char *line = fgetln(stdin, &line_len);
The line is not null terminated and contains \n (or whatever your platform is using) in the end. It becomes invalid after the next I/O operation on stream.
Something like this:
unsigned int getConsoleInput(char **pStrBfr) //pass in pointer to char pointer, returns size of buffer
{
char * strbfr;
int c;
unsigned int i;
i = 0;
strbfr = (char*)malloc(sizeof(char));
if(strbfr==NULL) goto error;
while( (c = getchar()) != '\n' && c != EOF )
{
strbfr[i] = (char)c;
i++;
strbfr = (void*)realloc((void*)strbfr,sizeof(char)*(i+1));
//on realloc error, NULL is returned but original buffer is unchanged
//NOTE: the buffer WILL NOT be NULL terminated since last
//chracter came from console
if(strbfr==NULL) goto error;
}
strbfr[i] = '\0';
*pStrBfr = strbfr; //successfully returns pointer to NULL terminated buffer
return i + 1;
error:
*pStrBfr = strbfr;
return i + 1;
}
The best and simplest way to read a line from a console is using the getchar() function, whereby you will store one character at a time in an array.
{
char message[N]; /* character array for the message, you can always change the character length */
int i = 0; /* loop counter */
printf( "Enter a message: " );
message[i] = getchar(); /* get the first character */
while( message[i] != '\n' ){
message[++i] = getchar(); /* gets the next character */
}
printf( "Entered message is:" );
for( i = 0; i < N; i++ )
printf( "%c", message[i] );
return ( 0 );
}
Here is a minimal implementation to do it, the nice thing is that it will not keep the '\n', however you have to give it a size to read for security:
#include <stdio.h>
#include <errno.h>
int sc_gets(char *buf, int n)
{
int count = 0;
char c;
if (__glibc_unlikely(n <= 0))
return -1;
while (--n && (c = fgetc(stdin)) != '\n')
buf[count++] = c;
buf[count] = '\0';
return (count != 0 || errno != EAGAIN) ? count : -1;
}
Test with:
#define BUFF_SIZE 10
int main (void) {
char buff[BUFF_SIZE];
sc_gets(buff, sizeof(buff));
printf ("%s\n", buff);
return 0;
}
NB: You are limited to INT_MAX to find your line return, which is more than enough.