/* stringlength
* input: str, pointer to a string
* output: integer representing the length of string str,
* not counting the terminating character.
*
* You may NOT call ANY functions within this function.
*/
int stringlength(char *str)
{
// count the number of characters in str
int count=0,k;
for (k=0; str[k] != '\0';k++)
count++;
return count;
}
/* countchars
* inputs: character c, string str
* output: The number of instances of c in the string str
* You may not call ANY function calls within this function.
*/
int countchars(char c, char *str)
{
// count the number of times c is found in str
int k,count=0;
for (k=0;str[k]=='\0';k++)
{
if (str[k] == c)
count++;
else;
}
return count;
}
/* countlines
* input: char *filename - string containing the filename
* output: integer representing the number of lines in the file
*/
int countlines(char *filename)
{
// count the number of lines in the file called filename
FILE *f = fopen(filename,"r");
char ch;
int lines=0;
f = fopen(filename,"r");
do{
ch = fgetc(f);
if( ch == '\n')
lines++;
}while( ch != EOF );
return lines;
}
I need help with these three different functions that I am implementing in my program. I am a beginner so go easy on me, the countlines function is giving me the most trouble. If anyone could explain why not or why these functions will work, it would be greatly appreciated.
There are a number of problems in countlines():
You open the file twice, but overwrite the first FILE * value with the second, so there's no way you can close it. This is a minor problem.
The major problem is that the function fgetc() returns an int, not a char. In particular, EOF is a value different from every char.
The code does not close the file before returning. Generally, if you open a file in a function, then you should close it. If you don't, you have to pass the file pointer back to the calling code so that it can close it.
The do ... while loop is seldom correct for an input loop (a while loop testing at the top is almost always much cleaner and clearer) but at least you weren't using feof().
int countlines(char *filename)
{
FILE *fp = fopen(filename,"r");
int ch;
int lines = 0;
if (fp == 0)
return lines;
while ((ch = fgetc(fp)) != EOF)
{
if (ch == '\n')
lines++;
}
fclose(fp);
return lines;
}
When you use char instead, one of two things happens:
If your char type is signed, then a real character (often ÿ — y-umlaut, U+00FF, LATIN SMALL LETTER Y WITH DIAERESIS) also matches EOF so you can stop reading before you reach end of file.
If your char type is unsigned, no value will ever match EOF so the loop will never stop.
In stringlength(), you have two variables count and k that are carefully kept at the same value; you only need one of the two.
Apart from raggedy indentation (endemic in the code shown — and definitely something to be avoided), and the unnecessary and pointless else; which does absolutely nothing, the code for countchars() looks OK (late addition) ... has the condition in the for loop inverted; it should be str[k] != '\0', of course.
Related
I am making a simple program to read from a file character by character, puts them into tmp and then puts tmp in input[i]. However, the program saves a character in tmp and then saves the next character in input[i]. How do I make it not skip that first character?
I've tried to read into input[i] right away but then I wasn't able to check for EOF flag.
FILE * file = fopen("input.txt", "r");
char tmp;
char input[5];
tmp= getc(file);
input[0]= tmp;
int i=0;
while((tmp != ' ') && (tmp != '\n') && (tmp != EOF)){
tmp= getc(file);
input[i]=tmp;
length++;
i++;
}
printf("%s",input);
It's supposed to print "ADD $02", but instead it prints "DD 02".
You are doing things in the wrong order in your code: The way your code is structures, reading and storing the first char is moved out of the loop. In the loop, that char is then overwritten. In that case start with i = 1.
Perhaps you want to read the first character anyway, but I guess you want to read everything up to the first space, which might be the first character. Then do this:
#include <stdio.h>
int main(void)
{
char input[80];
int i = 0;
int c = getchar();
while (c != ' ' && c != '\n' && c != EOF) {
if (i + 1 < sizeof(input)) { // store char if the is room
input[i++] = c;
}
c = getchar();
}
input[i] = '\0'; // null-terminate input
puts(input);
return 0;
}
Things to note:
The first character is read before the loop. the loop condition and the code that stores the char then use that char. Just before the end of the loop body, the next char is read, which will then be processed in the next iteration.
You don't enforce that the char buffer input cannot be overwritten. This is dangerous, especially since your buffer is tiny.
When you construct strings char by char, you should null-terminate it by placing an explicit '\0' at the end. You have to make sure that there is space for that terminator. Nearly all system functions like puts or printf("%s", ...) expect the string to be null-terminated.
Make the result of getchar an int, so that you can distinguish between all valid character codes and the special value EOF.
The code above is useful if the first and subsequent calls to get the next item are different, for example when tokenizing a string with strtok. Here, you can also choose another approach:
while (1) { // "infinite loop"
int c = getchar(); // read a char first thing in a loop
if (c == ' ' || c == '\n' || c == EOF) break;
// explicit break when done
if (i + 1 < sizeof(input)) {
input[i++] = c;
}
}
This approach has the logic of processing the chars in the loop body only, but you must wrap it in an infinite loop and then use the explicit break.
I wanted to only count the number of strings in a text file, containing numbers as well. But the code below, counts even the numbers in the file as strings. How do I rectify the problem?
int count;
char *temp;
FILE *fp;
fp = fopen("multiplexyz.txt" ,"r" );
while(fscanf(fp,"%s",temp) != EOF )
{
count++;
}
printf("%d ",count);
return 0;
}
Well, first up, using the temp pointer without having backing storage for it is going to cause you a world of pain.
I'd suggest, as a start, using something like char temp[1000] instead, keeping in mind that's still a bit risky if you have words more than a thousand or so characters long (that's a different issue to the one you're asking about so I'll mention it but not spend too much time on fixing it).
Secondly, it appears you want to count words with numbers (like alpha7 or pi/2). If that's the case, you simply need to check temp after reading the "word" and increment count only if it matches a "non-numeric" pattern.
That could be as simple as just not incrementing if the word consists only of digits, or it could be complicated if you want to handle decimals, exponential formats and so on.
But the bottom line remains the same:
while(fscanf(fp,"%s",temp) != EOF )
{
if (! isANumber(temp))
count++;
}
with a suitable definition of isANumber. For example, for unsigned integers only, something like this would be a good start:
int isANumber (char *str) {
// Empty string is not a number.
if (*str == '\0')
return 0;
// Check every character.
while (*str != '\0') {
// If non-digit, it's not a number.
if (! isdigit (*str))
return 0;
str++;
}
// If all characters were digits, it was a number.
return 1;
}
For more complex checking, you can use the strto* calls in C, giving them the temp buffer and ensuring you use the endptr method to ensure the entire string is scanned. Off the top of my head, so not well tested, that would go something like:
int isANumber (char *str) {
// Empty string is not a number.
if (*str == '\0')
return 0;
// Use strtod to get a double.
char *endPtr;
long double d = strtold (str, &endPtr);
// Characters unconsumed, not number (things like 42b).
if (*endPtr != '\0')
return 0;
// Was a long double, so number.
return 1;
}
The only thing you need to watch out for there is that certain strings like NaN or +Inf are considered a number by strtold so you may need extra checks for that.
inside your while loop, loop through the string to check if any of its characters are digits. Something like:
while(*temp != '\0'){
if(isnumber(*temp))
break;
}
[dont copy exact same code]
I find strpbrk to be one of the most helpful function to search for several needles in a haystack. Your set of needles being the numeric characters "0123456789" which if present in a line read from your file will count as a line. I also prefer POSIX getline for a line count do to its proper handling of files with non-POSIX line endings for the last line (both fgets and wc -l omit text (and a count) of the last line if it does not contain a POSIX line end ('\n'). That said, a small function that searches a line for characters contained in a trm passed as a parameter could be written as:
/** open and read each line in 'fn' returning the number of lines
* continaing any of the characters in 'trm'.
*/
size_t nlines (char *fn, char *trm)
{
if (!fn) return 0;
size_t lines = 0, n = 0;
char *buf = NULL;
FILE *fp = fopen (fn, "r");
if (!fp) return 0;
while (getline (&buf, &n, fp) != -1)
if (strpbrk (buf, trm))
lines++;
fclose (fp);
free (buf);
return lines;
}
Simply pass the filename of interest and the terms to search for in each line. A short test code with a default term of "0123456789" that takes the filename as the first parameter and the term as the second could be written as follows:
#include <stdio.h> /* printf */
#include <stdlib.h> /* free */
#include <string.h> /* strlen, strrchr */
size_t nlines (char *fn, char *trm);
int main (int argc, char **argv) {
char *fn = argc > 1 ? argv[1] : NULL;
char *srch = argc > 2 ? argv[2] : "0123456789";
if (!fn) return 1;
printf ("%zu %s\n", nlines (fn, srch), fn);
return 0;
}
/** open and read each line in 'fn' returning the number of lines
* continaing any of the characters in 'trm'.
*/
size_t nlines (char *fn, char *trm)
{
if (!fn) return 0;
size_t lines = 0, n = 0;
char *buf = NULL;
FILE *fp = fopen (fn, "r");
if (!fp) return 0;
while (getline (&buf, &n, fp) != -1)
if (strpbrk (buf, trm))
lines++;
fclose (fp);
free (buf);
return lines;
}
Give it a try and see if this is what you are expecting, if not, just let me know and I am glad to help further.
Example Input File
$ cat dat/linewno.txt
The quick brown fox
jumps over 3 lazy dogs
who sleep in the sun
with a temp of 101
Example Use/Output
$ ./bin/getline_nlines_nums dat/linewno.txt
2 dat/linewno.txt
$ wc -l dat/linewno.txt
4 dat/linewno.txt
I want program count lines in text file by function. It used to work ,but it always return 0 now.
What am I doing wrong?
#include <stdio.h>
int couLineF(FILE* fp){ //count lines in file
int count = 0,ch;
while((ch = fgetc(fp)) != EOF){
if(ch == (int)"\n" ) count++;
}
rewind(fp);
return count;
}
int main(){
FILE *fp = fopen("book.txt","r");
int lines;
if(fp){
lines = couLineF(fp);
printf("number of lines is : %d",lines);
}
return 0;
}
Another question
Are there any other ways to get number of lines in text file?
Your problem is here:
if(ch == (int)"\n" )
You are casting the address of "\n", a string literal, into an int and comparing it with ch. This doesn't make any sense.
Replace it with
if(ch == '\n' )
to fix it. This checks if ch is a newline character.(Use single quotes(') for denoting a character and double quotes(") for a string)
Other problems are:
Not closing the file using fclose if fopen was successful.
Your program won't count the last line if it doesn't end with \n.
There is absolutely no reason to use rewind(fp) as you never use the FILE pointer again.
I have an input file I need to extract words from. The words can only contain letters and numbers so anything else will be treated as a delimiter. I tried fscanf,fgets+sscanf and strtok but nothing seems to work.
while(!feof(file))
{
fscanf(file,"%s",string);
printf("%s\n",string);
}
Above one clearly doesn't work because it doesn't use any delimiters so I replaced the line with this:
fscanf(file,"%[A-z]",string);
It reads the first word fine but the file pointer keeps rewinding so it reads the first word over and over.
So I used fgets to read the first line and use sscanf:
sscanf(line,"%[A-z]%n,word,len);
line+=len;
This one doesn't work either because whatever I try I can't move the pointer to the right place. I tried strtok but I can't find how to set delimitters
while(p != NULL) {
printf("%s\n", p);
p = strtok(NULL, " ");
This one obviously take blank character as a delimitter but I have literally 100s of delimitters.
Am I missing something here becasue extracting words from a file seemed a simple concept at first but nothing I try really works?
Consider building a minimal lexer. When in state word it would remain in it as long as it sees letters and numbers. It would switch to state delimiter when encountering something else. Then it could do an exact opposite in the state delimiter.
Here's an example of a simple state machine which might be helpful. For the sake of brevity it works only with digits. echo "2341,452(42 555" | ./main will print each number in a separate line. It's not a lexer but the idea of switching between states is quite similar.
#include <stdio.h>
#include <string.h>
int main() {
static const int WORD = 1, DELIM = 2, BUFLEN = 1024;
int state = WORD, ptr = 0;
char buffer[BUFLEN], *digits = "1234567890";
while ((c = getchar()) != EOF) {
if (strchr(digits, c)) {
if (WORD == state) {
buffer[ptr++] = c;
} else {
buffer[0] = c;
ptr = 1;
}
state = WORD;
} else {
if (WORD == state) {
buffer[ptr] = '\0';
printf("%s\n", buffer);
}
state = DELIM;
}
}
return 0;
}
If the number of states increases you can consider replacing if statements checking the current state with switch blocks. The performance can be increased by replacing getchar with reading a whole block of the input to a temporary buffer and iterating through it.
In case of having to deal with a more complex input file format you can use lexical analysers generators such as flex. They can do the job of defining state transitions and other parts of lexer generation for you.
Several points:
First of all, do not use feof(file) as your loop condition; feof won't return true until after you attempt to read past the end of the file, so your loop will execute once too often.
Second, you mentioned this:
fscanf(file,"%[A-z]",string);
It reads the first word fine but the file pointer keeps rewinding so it reads the first word over and over.
That's not quite what's happening; if the next character in the stream doesn't match the format specifier, scanf returns without having read anything, and string is unmodified.
Here's a simple, if inelegant, method: it reads one character at a time from the input file, checks to see if it's either an alpha or a digit, and if it is, adds it to a string.
#include <stdio.h>
#include <ctype.h>
int get_next_word(FILE *file, char *word, size_t wordSize)
{
size_t i = 0;
int c;
/**
* Skip over any non-alphanumeric characters
*/
while ((c = fgetc(file)) != EOF && !isalnum(c))
; // empty loop
if (c != EOF)
word[i++] = c;
/**
* Read up to the next non-alphanumeric character and
* store it to word
*/
while ((c = fgetc(file)) != EOF && i < (wordSize - 1) && isalnum(c))
{
word[i++] = c;
}
word[i] = 0;
return c != EOF;
}
int main(void)
{
char word[SIZE]; // where SIZE is large enough to handle expected inputs
FILE *file;
...
while (get_next_word(file, word, sizeof word))
// do something with word
...
}
I would use:
FILE *file;
char string[200];
while(fscanf(file, "%*[^A-Za-z]"), fscanf(file, "%199[a-zA-Z]", string) > 0) {
/* do something with string... */
}
This skips over non-letters and then reads a string of up to 199 letters. The only oddness is that if you have any 'words' that are longer than 199 letters they'll be split up into multiple words, but you need the limit to avoid a buffer overflow...
What are your delimiters? The second argument to strtok should be a string containing your delimiters, and the first should be a pointer to your string the first time round then NULL afterwards:
char * p = strtok(line, ","); // assuming a , delimiter
printf("%s\n", p);
while(p)
{
p = strtok(NULL, ",");
printf("%S\n", p);
}
Which is the fastest way to get the lines of an ASCII file?
Normally you read files in C using fgets. You can also use scanf("%[^\n]"), but quite a few people reading the code are likely to find that confusing and foreign.
Edit: on the other hand, if you really do just want to count lines, a slightly modified version of the scanf approach can work quite nicely:
while (EOF != (scanf("%*[^\n]"), scanf("%*c")))
++lines;
The advantage of this is that with the '*' in each conversion, scanf reads and matches the input, but does nothing with the result. That means we don't have to waste memory on a large buffer to hold the content of a line that we don't care about (and still take a chance of getting a line that's even larger than that, so our count ends up wrong unless we got to even more work to figure out whether the input we read ended with a newline).
Unfortunately, we do have to break up the scanf into two pieces like this. scanf stops scanning when a conversion fails, and if the input contains a blank line (two consecutive newlines) we expect the first conversion to fail. Even if that fails, however, we want the second conversion to happen, to read the next newline and move on to the next line. Therefore, we attempt the first conversion to "eat" the content of the line, and then do the %c conversion to read the newline (the part we really care about). We continue doing both until the second call to scanf returns EOF (which will normally be at the end of the file, though it can also happen in case of something like a read error).
Edit2: Of course, there is another possibility that's (at least arguably) simpler and easier to understand:
int ch;
while (EOF != (ch=getchar()))
if (ch=='\n')
++lines;
The only part of this that some people find counterintuitive is that ch must be defined as an int, not a char for the code to work correctly.
Here's a solution based on fgetc() which will work for lines of any length and doesn't require you to allocate a buffer.
#include <stdio.h>
int main()
{
FILE *fp = stdin; /* or use fopen to open a file */
int c; /* Nb. int (not char) for the EOF */
unsigned long newline_count = 0;
/* count the newline characters */
while ( (c=fgetc(fp)) != EOF ) {
if ( c == '\n' )
newline_count++;
}
printf("%lu newline characters\n", newline_count);
return 0;
}
Maybe I'm missing something, but why not simply:
#include <stdio.h>
int main(void) {
int n = 0;
int c;
while ((c = getchar()) != EOF) {
if (c == '\n')
++n;
}
printf("%d\n", n);
}
if you want to count partial lines (i.e. [^\n]EOF):
#include <stdio.h>
int main(void) {
int n = 0;
int pc = EOF;
int c;
while ((c = getchar()) != EOF) {
if (c == '\n')
++n;
pc = c;
}
if (pc != EOF && pc != '\n')
++n;
printf("%d\n", n);
}
Common, why You compare all characters? It is very slow. In 10MB file it is ~3s.
Under solution is faster.
unsigned long count_lines_of_file(char *file_patch) {
FILE *fp = fopen(file_patch, "r");
unsigned long line_count = 0;
if(fp == NULL){
return 0;
}
while ( fgetline(fp) )
line_count++;
fclose(fp);
return line_count;
}
What about this?
#include <stdio.h>
#include <string.h>
#define BUFFER_SIZE 4096
int main(int argc, char** argv)
{
int count;
int bytes;
FILE* f;
char buffer[BUFFER_SIZE + 1];
char* ptr;
if (argc != 2 || !(f = fopen(argv[1], "r")))
{
return -1;
}
count = 0;
while(!feof(f))
{
bytes = fread(buffer, sizeof(char), BUFFER_SIZE, f);
if (bytes <= 0)
{
return -1;
}
buffer[bytes] = '\0';
for (ptr = buffer; ptr; ptr = strchr(ptr, '\n'))
{
++count;
++ptr;
}
}
fclose(f);
printf("%d\n", count - 1);
return 0;
}