I'm am working my way through the exercises of the first chapter of The C Programming Language and while I understand most of what is said and shown, there is one example that I don't understand.
In 1.9, there is a function shown to return the length of a line while setting a char array, passed as an argument, to the contents.
int get_line(char s[], int lim)
{
int c, i, l;
for (i = 0, l = 0; (c = getchar()) != EOF && c != '\n'; ++i) {
if (i < lim - 1)
s[l++] = c;
}
if (c == '\n')
if (l < lim - 1)
s[l++] = c;
s[l] = '\0';
return l;
}
The thing I do not understand, is why we need this: if (c == '\n') {...}. Could this not be combined in the for-loop? Where we explicitly check that c is not equal to '\n'? I'm having trouble wrapping my head around why this needs to be an external condition.
Any light shed would be helpful!
Thanks!
The for loop is exited if either c equals EOF or c equals '\n'. Therefore, immediately after the for loop, if you want to know which value c has, you must test.
is why we need this: if (c == '\n') {...}.
get_line() is structurally:
get_line() {
initialize
while get, A not true and B not true
perform X
if B
perform X
finalize
The loop quits under 2 conditions. With one of those (c == '\n'), we still want to perform X somewhere as that is part of the function goal.
Could this not be combined in the for-loop?
It could be combined, yet then we have 2 locations that exit the loop.
Typical coding guidelines promote a single location to quit the loop. If we set aside that goal, then:
get_line() {
initialize
while get, A not true
perform X
if B quit the loop
finalize
As below with the same number of conditions checks, yet 2 loop exit points.
int get_line(char s[], int lim) {
int c, i, l;
for (i = 0, l = 0; (c = getchar()) != EOF; ++i) {
if (i < lim - 1)
s[l++] = c;
if (c == '\n')
break;
}
s[l] = '\0';
return l;
}
We could contort the code to get the 2 checks back on the same line and not have that pesky after the loop if (c == '\n'). Stylistically this may be harder to follow.
int get_line(char s[], int lim) {
int c, i, l;
for (i = 0, l = 0, c = 0; c != '\n' && (c = getchar()) != EOF; ++i) {
if (i < lim - 1)
s[l++] = c;
}
s[l] = '\0';
return l;
}
Lastly, code could use improvements:
No need for i and l index counters. One is enough.
Array sizing and index best uses size_t type. Warning: size_t is some unsigned type.
Using a leading size parameter allows for better static code analysis and self-documenting code: the lim relates to s[].
Avoid math on input parameters to not incur overflow. We have more range control on local objects.
Careful when lim is at an extreme or zero.
Rather than assign after declaration, where practical, initialize. E.g. int i = 0;
get_line() {
initialize
while B not true, get, A not true
perform X
finalize
or
#include <stdio.h>
#include <stdlib.h>
size_t get_line(size_t size, char s[size]) {
int ch = 0;
size_t i = 0;
while (ch != '\n' && (ch = getchar()) != EOF) {
if (i + 1 < size)
s[i++] = (char) ch;
}
// size might have been pathologically 0, so no room for \0
if (i < size) {
s[i] = '\0';
}
return i;
}
If you want to put it in the loop, you have to do something like that:
int get_line(char s[], int lim)
{
int c, i, l;
for (i = 0, l = 0; (c = getchar()) != EOF; ++i) {
if ((i < lim - 1) && (c != '\n'))
s[l++] = c;
else if (c == '\n') {
if (l < lim - 1)
s[l++] = c;
break;
}
}
s[l] = '\0';
return l;
}
So as you see, wrapping the condition inside the loop, led to more conditions checks and a break statatement.
Related
I'm studying C using the "C Programming Language" book, and I got into character arrays (section 1.9) from chapter 1. This is the getline() function it presents:
/* getline: read a line into s, return length */
int getline(char s[], int lim)
{
int c, i;
for (i = 0; i < lim-1 && (c = getchar()) != EOF && c != '\n'; ++i)
s[i] = c;
if (c == '\n') {
s[i] = c;
++i;
}
s[i] = '\0';
return i;
}
Take the word "Hi" as example.
In my point of view, s[1] should be equal to '\n', because the i should be only incremented when all conditions are met, however, when c is equal to '\n', the for loop exits, and i should contain a value of 1 and not 2.
So, why it is being incremented with only 2 of 3 conditions met?
I already tried using pre-increment operators and post-increments operators, and testing the i value inside certain lines of code.
The loop variable i is incremented at the end of the loop body. You can write your for-loop:
for(i = 0; i < lim-1 && (c = getchar()) != EOF && c != '\n'; i++)
s[i] = c;
as an equivalent while-loop:
i = 0;
while(i < lim-1 && (c = getchar()) != EOF && c != '\n') {
s[i] = c;
i++;
}
Also, I suggest you rename your function to not conflict with the exiting function of the same name. Consider swapping the arguments so you can express how the lim and s variables are related and use size_t instead of int as negative lim probalby doesn't make sense:
size_t my_getline(size_t lim, char s[lim]);
The loop condition to i < lim - 1 is wrong as may write 2 bytes after the loop which will overflow the buffer. If lim <= 0 you overflow the buffer. if lim < 1 you have undefined behavior as c is uninitialized. I suggest you handle the newline in the loop like this, and only write the '\0' if s has room for at least 1 byte:
size_t my_getline(size_t lim, char s[lim]) {
size_t i = 0;
while(i < lim - 1) {
int c = getchar();
if(c == EOF) break;
s[i++] = c;
if(c == '\n') break;
}
if(lim) s[i] = '\0';
return i;
}
I write a program to solve Exercise 2-2 of K&R.
#include<stdio.h>
#define MAXLINE 1000
/* write an equivalent loop without using && or || */
int Getline(char s[], int lim);
int main()
{
int len;
char line[MAXLINE];
while ((len = Getline(line, MAXLINE)) > 0) {
printf("%s%15s%d\n", line, "length: ", len);
}
return 0;
}
int Getline(char s[], int lim)
{
int flag = 1;
int i = 0, c = 0;
for (i = 0; flag == 1; ++i) {
if (i < lim - 1) {
if ((c = getchar()) != '\n') {
if (c != EOF) {
;
}
else {
flag = 0;
}
}
else {
flag = 0;
}
}
else {
flag = 0;
}
if (flag == 1) {
s[i] = c;
}
}
if (c == '\n') {
s[i] = c;
++i;
}
s[i] = '\0';
return i;
}
This program is wrong...in a weird way.
I run this code with redirection like
./2-2 <in
with the in file
Get this line.
Then the output to the screen is countless
G length: 1
It looks like the program stuck in a loop. But when I stop using redirection and just type Get this line. to the terminal, though it is still wrong, the countless output disappeared. Why?
The problem is here:
for (i = 0; flag == 1; ++i) {
^^^
i will always increment to at least 1
before the for-loop ends
so your function will never return 0
Instead of incrementing in a for-loop, only increment after inserting a new element. Like
if (flag == 1) {
s[i] = c;
++i;
}
Instead of a for-loop, you could use a while loop, like:
int i = 0;
while (flag == 1)
{
...
}
The next step is to get rid of the flag and use break insted. Like:
int i = 0;
while (1)
{
if (i >= lim - 1) break;
...
}
Your code will be much shorter and easier to read.
You have also complicated your function quite a bit. If you simply want to get the line redirected from the file, store it in line and insure it is nul-terminated (and without the trailing '\n' - which you shouldn't leave dangling off strings you store), you could do something quite simple like:
int Getline (char *s, int lim)
{
int i = 0; /* char count - length */
while (i < lim - 1) { /* loop while chars fit */
int c = getchar(); /* getchar */
if (c == EOF) /* validate EOF? */
goto done;
if (c == '\n') /* validate newline */
continue;
s[i++] = c; /* good char, increment len */
}
done:;
s[i] = 0; /* nul-terminate */
return i; /* return length */
}
(note: from your comment about not having used break before, then a simple goto works just as well)
Example Use/Output
Given your file containing the line "Get this line."
$ ./bin/equivloop <dat/in
Get this line. length: 14
(note: if you store the newline, then the length would be 15 and that output would be on the next line)
#include <stdio.h>
#define MAXLINE 1024
int getline(char s[], int lim);
main()
{
int i, len;
char line[MAXLINE];
while ((len = getline(line, MAXLINE)) > 0) {
i = len - 2;
while (i >= 0 && (line[i] == ' ' || line[i] == '\t'))
--i;
if (i >= 0) {
line[i+1] = '\n';
line[i+2] = '\0';
printf("%s", line);
}
}
return 0;
}
int getline(char s[],int lim)
{
int i,c;
for(i=0;i<lim-1 && (c=getchar())!=EOF && c!='\n';++i)
s[i] = c;
if( c == '\n')
{
s[i]=c;
++i;
}
s[i]='\0';
return i;
}
This is my program which i tired but it seems like displaying the same input without any changes.I can't able to understand what mistake i did ....
Thanks in advance.
Your getline function increments i (the length returned) for the '\n', but not for the terminating '\0'.
Hence, in i = len - 2; you should be subtracting one, not two... right? To clarify, I'm suggesting that you try i = len - 1;!
Here is a couple of hopefully helpful hints.
There is already a function getline(), you are overruling that name. If you increase your compilation warning level then you'll probably see the warning. Anyway, assuming you want to have your own line-reading function, you could change it to say my_getline(). If you do this, then your program works for me.
Similarly, your declaration of main() doesn't have the type in front of it as in int main(). That's OK, the default type is int, but for neatness' sake, you should put the type in there (again, to avoid compilation warnings when you increase the warning level).
Your program can't handle lines longer than 1024 characters. That's ok if you can live with this limitation. If you would want to get rid of this limitation, then you'd want to wire my_getline() in such a way that it would return a pointer to allocated memory holding the entire line of text that it read. In main() you could strip your trailing space etc. and then free() the memory. In this case the prototype of my_getline() would be of course char *my_getline().
Check for the number of characters after removing white-space characters(blanks , tabs ) then compare with no of characters before removing the white-spaces . if they differ your program works..
you will not be able to see the difference in our input and out since we are dealing with white space characters here..
The following solution works for me. Please note that I have not defined any new functions.
#include <stdio.h>
#define MAXLINE 1000
#define INLINE 1
#define OUTLINE 0
int main(void) {
int i = 0, j, c;
char line[MAXLINE];
int status = OUTLINE;
while((c = getchar()) != EOF) {
if (c != '\n') {
line[i] = c;
i++;
} else if (c == '\n'){
line[i] = '\n';
line[i+1] = '\0';
for (j = i; j >= 0 && status == OUTLINE; j--) {
char lc = line[j];
if (lc == ' ' || lc == '\t') {
line[j] = '\n';
line[j+1] = '\0';
status = INLINE;
}
}
printf("%s", line);
for (j = 0; j < MAXLINE; j++) {
line[j] = 0;
}
i = 0;
}
}
return 0;
}
I have been learning from the C Programming Language book (K&R) and was writing one of the exercises that removes trailing blanks from an input. I understand that a segmentation fault is at some level a problem having to do with accessing memory that is not accessible, but I have read through this code several times and can't find the error. I would like it very much if someone could help find this error and tell me how to discover errors like this in the future.
#include <stdio.h>
#define MAXLINE 1000
#define CHAR 0 /*character definition*/
#define TRAIL 1 /*determines whether program is in a trailing blank*/
int getinput(char input[], int max);
int trailrem(char input[], char copyto[]);
int len;
int main() {
char line[MAXLINE]; /*current line*/
char newline[MAXLINE];
int i, c, newreturn; /*integer counter, character holder, current line length, and trailrem return value*/
int len;
while((len = getinput(line, MAXLINE)) > 0) {
newreturn = trailrem(line, newline);
for(i = 0; i <= newreturn; ++i)
printf("\n%c\n", newline[i]);
}
}
int getinput(char input[],int max) {
int i, c, line;
for(i = 0; (c = getchar()) != EOF && c != '\n' && c < (max-1); ++i)
input[i] = c;
if(c == '\n') {
input[i] = c;
++i;
}
input[i] = '\0';
return i;
}
int trailrem(char input[], char copy[]) {
int i, j, minusin, state, r;
for(i = len; input[i] != EOF && i >= 0; --i) {
if(input[i] =='\n')
state = TRAIL;
else if((input[i] == ' ' && state == TRAIL) ||( input[i] == '\t' && state == TRAIL))
++minusin;
else if(state == TRAIL && (input[i] != ' ' || input[i] != '\t'))
state = CHAR;
for(j = (r = len-minusin); state == CHAR; --j){
copy[j-2] = input[i];
}
}
copy[r] = '\0';
copy[r-1] = '\n';
return r;
}
So many problems in your code. But the main problem is, you have a global len
int len;
And a local len in the main function.
You are initializing len in main function like this:
while((len = getinput(line, MAXLINE)) > 0)
So the local len is updated. But the global len is still 0.
You are expecting that, you will get the updated value of len in trailrem method but you don't. In trailrem() you will get len equal to 0!
for(i = len; input[i] != EOF && i >= 0; --i)
So i is 0 too. And hence, copy[r-1] = '\n'; will crash, because r-1 can be negative.
Other problems: (BLUEPIXY and WhozCraig mentioned in the comment).
for(i = 0; (c = getchar()) != EOF && c != '\n' && c < (max-1); ++i)
here, c < (max-1) should be i < (max-1).
++minusin; in trailrem function where minusin is uninitialized.
I'm on to K&R's Exercise 1-18
Write a program to remove trailing blanks and tabs from each line of input, and to delete entirely blank lines.
This is what I've came up with so far
#include <stdio.h>
#define MAXLINE 1000
int getline(char line[], int maxline);
void copy(char to[], char from[]);
int main () {
int len;
char line[MAXLINE];
while (getline(line, MAXLINE) > 0) {
printf("%s", line);
}
return 0;
}
int getline(char s[], int lim) {
int c, i, lastNonBlankIndex;
lastNonBlankIndex = 0;
for (i=0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; ++i) {
if (c != ' ' && c != '\t') {
lastNonBlankIndex = i + 1;
}
s[i] = c;
}
if (i != lastNonBlankIndex) {
i = lastNonBlankIndex;
c = '\n';
}
if (c == '\n') {
s[i] = c;
++i;
}
s[i] = '\0';
return i;
}
The second part sounded hard, as I wasn't sure what I should return if the line only has blanks or tabs. After all, if I return 0, it will halt the getline() calling. Would this be where I should set up a #define, such as ALL_BLANKS.
Anyway, to actual main question, is this a correct way to remove trailing blanks and tabs from lines? I ran a few inputs through, and it seemed to work. However, if I copy and paste text with newlines into the CL, it appears all strung together. And when I type a line into the CL and push enter, it automatically prints it. Should I be building an array of lines, and then looping through and printing them when done ?
Your code looks correct, but I think it would be better if you separate the operations of reading a line from stdin and stripping the line of trailing whitespace (decoupling). Then you can use the unmodified getline from the book (code reuse) and won't have the problem of halting on returning 0.
And if you are interested in other solutions, the CLC-wiki has an almost complete list of K&R2 solutions.
#include <stdio.h>
#define MAXLINE 1024
int getline(char s[], int lim);
main()
{
int i, len;
char line[MAXLINE];
while ((len = getline(line, MAXLINE)) > 0) {
i = len - 2;
while (i >= 0 && (line[i] == ' ' || line[i] == '\t'))
--i;
if (i >= 0) {
line[i+1] = '\n';
line[i+2] = '\0';
printf("%s", line);
}
}
return 0;
}
This is the category 1 solution I wrote some time ago. getline is as on page 28 of the book. It might be nicer to put the removal of whitespace in a separate function rstrip, but I leave this as an exercise for the reader.
Your basic design is sound. It is better, as you did, to print a stripped line as soon as you've built it, so that your program only needs to keep one line at a time in memory and not the whole file.
There is a small problem with your code: it doesn't implement the second part of the question (“delete entirely blank line”). That's because you always tack a '\n' at the end of the string. This is easy to fix, but remember that you must return a nonzero value to your caller since a blank line doesn't indicate the end of the file.
getline should return -1 (a negative value in general) if there is an error or if EOF is reached. Then your loop conditional can check that it returns something >= 0 and still allow for 0 length lines.
for (i=0; i < lim - 1 && (c = getchar()) != EOF && c != '\n'; ++i) {
I almost never include an assignment within a loop conditional. I would rather add 10 lines of code to get around doing that because it's difficult to read. I would especially refrain from using them with complicated conditionals.
int i = 0;
while (i < lim) {
c = getchar();
if (c == EOF || c == '\n') {
break;
}
line[i] = (char)c;
i++;
}
line[i] = '\0'; // Null terminate the string
This code should read in a line for you. I would separate the reading in of the line from the removal of the trailing white space. You could very easily work backwards from the end of the string to remove white spaces at the location where I null terminated the line, since after having read in the line you now know its length. Essentially you grow the string and then you prune it back down after it has finished growing.
This is how i did it.
#include <stdio.h>
#define MAXLINE 1000
#define IN 1
#define OUT 0
int state = OUT;
int getline(char s[], int lim);
void copy(char to[], char from[]);
int main(void)
{
int lenght;
int max = 0;
char line[MAXLINE];
char longest[MAXLINE];
while ((lenght = getline(line, MAXLINE)) > 0)
if (lenght > max)
{
max = lenght;
copy(longest, line);
}
if (max > 0)
printf("\n%s\n", longest);
return 0;
}
int getline(char s[], int lim)
{
int i, c;
for (i = 0; i < lim - 1 && ((c = getchar()) != EOF) && (c != '\n'); i++)
{
if (state == IN && c != ' ' && c != '\t')
{
s[i] = ' ';
i++;
state = OUT;
}
if (s[0] == ' ')
{
s[0] = '\b';
}
s[i] = c;
if (c == ' ' || c == '\t')
{
i--;
state = IN;
}
}
if (c == '\n')
{
s[i] = c;
i++;
}
s[i] = '\0';
return i;
}
void copy(char to[], char from[])
{
int i = 0;
while ((to[i] = from[i]) != '\0')
i++;
}
#include <stdio.h>
#define MAXLINE 1000
size_t getline(char *s,size_t lim)
{
if( fgets(s,lim,stdin) )
{
while( *s && strchr(" \t\n",s[strlen(s)-1]) )
s[strlen(s)-1]=0;
return strlen(s);
}
return 0;
}
main()
{
int len;
char line[MAXLINE];
while (getline(line,sizeof line)) {
printf("%s", line);
}
return 0;
}