Program containing fscanf fails to compile - c

As someone who is pretty new to C, I'm still trying to wrap my head around the massive amount of functions.
One in particular is giving me a lot of problems.
I'm trying to use fscanf but I keep getting a strange error:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char **argv)
{
FILE *dict = fopen(argv[1], "r");
// skim through every word in a dictionary
while (fscanf(dict, "%s", word) != EOF)
{
// iterate through every chracter in the word
for (int i = 0, int j = strlen(word); i < j; i++)
{
printf("%c", word[i]);
}
printf("\n");
}
return 1;
}
I'm trying to create a loop that skims through every single word in a file that is just a series of words.
I'm assuming that the code I've written does just that, storing each word in a string called "word".
Unfortunately, I keep getting this error:
error: format specifies type 'char *' but the argument has type
'<dependent type>' [-Werror,-Wformat]
error: use of undeclared identifier 'word'
Even if I try to initialize a char * variable "word", some other error just keeps popping up. Maybe I don't fully understand how fscanf works, could anyone provide some clearance?
How can I make the above program compile?

Because you are using C, this answer does not care about infamous "buffer overflow error" but please keep in mind that.
You need to declare 'word' variable before you use 'fscanf'. It looks like...
const size_t MAX_WORD_LEN=256;
char word[MAX_WORD_LEN];
then, you can.
while (fscanf(file, "%s", word) != EOF)
{
}
Here, the maximum 'word' length is 255 characters (without terminating NULL character). So in your file one single word length cannot be over 255 characters (in most cases, it should be fine).
The whole program most likely be as follow:
#include <stdio.h>
const char* const usage_msg = "Usage: program_name input-file\nInput file contains words in English or some languages. \n";
const char* const fopen_err = "\nUnable to open file: ";
const size_t MAX_WORD_LEN = 256;
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "%s", usage_msg);
return -1;
}
else {
/* if argv[1] contains something that is not supposed to be, it will be infamous buffer overflow. */
/* any validation on argv[1] is welcomed here */
FILE *pf = fopen(argv[1], "r");
char word[MAX_WORD_LEN];
if (pf == NULL) {
fprintf(stderr, "%s%s", fopen_err, argv[1]);
return -2;
}
/* overflow can be avoided by specifying width 255 to %s */
while (fscanf(pf, "%255s", word) != EOF) {
printf("%s\n", word);
}
}
return 0;
}

Related

How to use fscanf to read a text file including many words and store them into a string array by index

The wordlist.txt is including like:
able
army
bird
boring
sing
song
And I want to use fscanf() to read this txt file line by line and store them into a string array by indexed every word like this:
src = [able army bird boring sing song]
where src[0]= "able", src[1] = "army" and so on. But my code only outputs src[0] = "a", src[1] = "b"... Could someone help me figure out what's going wrong in my code:
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
FILE *fp = fopen("wordlist.txt", "r");
if (fp == NULL)
{
printf("%s", "File open error");
return 0;
}
char src[1000];
for (int i = 0; i < sizeof(src); i++)
{
fscanf(fp, "%[^EOF]", &src[i]);
}
fclose(fp);
printf("%c", src[0]);
getchar();
return 0;
}
Pretty appreciated!
For example like this.
#include <stdio.h>
#include <string.h>
#include <errno.h>
#define MAX_ARRAY_SIZE 1000
#define MAX_STRING_SIZE 100
int main(int argc, char *argv[]) {
FILE *fp = fopen("wordlist.txt", "r");
if (fp == NULL) {
printf("File open error\n");
return 1;
}
char arr[MAX_ARRAY_SIZE][MAX_STRING_SIZE];
int index = 0;
while (1) {
int ret = fscanf(fp, "%s", arr[index]);
if (ret == EOF) break;
++index;
if (index == MAX_ARRAY_SIZE) break;
}
fclose(fp);
for (int i = 0; i < index; ++i) {
printf("%s\n", arr[i]);
}
getchar();
return 0;
}
Some notes:
If there is an error, it is better to return 1 and not 0, for 0 means successful execution.
For a char array, you use a pointer. For a string array, you use a double pointer. A bit tricky to get used to them, but they are handy.
Also, a check of the return value of the fscanf would be great.
For fixed size arrays, it is useful to define the sizes using #define so that it is easier to change later if you use it multiple times in the code.
It's reading file one character at a time, Which itself is 4 in size like we see sizeof('a') in word able. Same goes for 'b' and so on. So one approach you can use is to keep checking when there is a space or newline character so that we can save the data before these two things as a word and then combine these small arrays by adding spaces in between and concatenating them to get a single array.

Get the user to enter a name but using file stream *fp

I am a beginner in c so I have a problem with get the user to input last name, a comma & then first name. However it will pass to the function call
int get_name(FILE *fp)
in my main function. I have a problem either if I have to use the arguments parameters.
Example, main (int argc, char *argv[])) or just main (void))
and from what I have been searching so far, FILE*fp cannot get the user to enter from stdin it only use to open the file(?) BUT I am required to get the user to input from keyboard and pass to the function. I have written some codes. but they don't seem to work but I am going to put down on here the one I am sure that I need a few changes most.
#define LINESIZE1024
int main(void){
FILE *fp;
char line[LINESIZE];
char first;
char last;
char comma;
while(1){
if(!fgets(line,LINESIZE,stdin)){
clearerr(stdin);
break;
}
if(fp = (sscanf(line,"%s %s %s",&last,&comma,&first)==3))
get_name(fp);
if(get_last_first(fp)== -1)
break;
printf("Please enter first name a comma and then last name");
}
BUT I got an error saying I can't use pass it from pointer to an integer. and many MORE but I accidentally closed my concolse and all the errors that appeared while I was trying to fix are gone. So please give me some ideas.
What about seconde code
while(1){
if(!fgets(line,LINESIZE,fp)){
clearerr(stdin);
break;
}
if(sscanf(line,"%s %s %s",last,comma,first)==3)
get_last_first(fp);
return 0;
}
It gave me errors too. fp,last,first,comma used uninitialized in this function
OK so I think I have fixed the previous problem now. However it doesn't print the name back if the name is given correctly. Here is my fixed main code.
int main(void){
FILE *fp = stdin;
char line[LINESIZE];
char first[16];
char last[16];
while(1){
if(!fgets(line,LINESIZE,stdin)){
clearerr(stdin);
break;
}
if(sscanf(line,"%s ,%s",last,first)==2)
if(get_name(fp)==2)
printf("Your name is: %s %s\n", first, last);
}
return 0;
}
here is my function.
int get_name(FILE *fp){
char line[LINESIZE];
char last[16], first[16];
int n;
/* returns -1 if the input is not in the correct format
or the name is not valid */
if(fgets(line, LINESIZE, fp) == NULL) {
return -1;
}
/* returns 0 on EOF */
if((n = sscanf(line, " %[a-zA-Z-] , %[a-zA-Z-]", last, first)) == EOF) {
return 0;
}
/* prints the name if it's valid */
if((n = sscanf(line, " %[a-zA-Z-] , %[a-zA-Z-]", last, first)) == 2) {
return 2;
}
return 1;
}
I thank you people so much for taking time to read and help me. Please don't be mean :)
Seems that you are making it more complicated than needed. Don't call fgets and scanf in main. Only do that in the function get_name.
It can be something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LINESIZE 1024
int get_name(FILE *fp)
{
char line[LINESIZE];
char* t;
if(!fgets(line, LINESIZE,fp))
{
printf("Error reading input\n");
return 0;
}
t = strstr(line, ",");
if (t)
{
*t = '\0';
++t;
printf("First: %s - Last: %s\n", line, t);
return 2;
}
printf("Illegal input\n");
return 0;
}
int main(int argc, char **argv)
{
get_name(stdin);
return 0;
}
If you later decide that you want to read from a file, you can reuse the function get_name without changing it at all. All you need is to change main. Like:
int main(int argc, char **argv)
{
FILE* f = fopen("test.txt", "r");
if (f)
{
get_name(f);
fclose(f);
}
else
{
printf("Open file failed\n");
}
return 0;
}
If you want to read from the keyboard, read from stdin or use scanf, which internally reads from stdin. If you want to read from a file instead, use FILE *fp, but don't forget to open the file and check if it was successful (you'll find lots of tutorials for this).
Further, when reading in strings, you need an array of characters, not a single one. Note further, that scanf can already deal with formats like "everything that is not a ',' then a ',' then a string. Note that format "[^,]" means "any character except a ',':
So you could adapt the code as follows:
#define LINESIZE 1024
int main(void){
char line[LINESIZE];
char first[LINESIZE];
char last[LINESIZE];
while(fgets(line,LINESIZE,stdin)) {
if(sscanf(line,"%[^,],%s",last,first)==2) {
printf("Read in %s ... %s\n",last,first);
}
else {
printf("Please enter first name a comma and then last name");
}
}
return 0;
}
And if your professor is picky concerning the "use FILE*", you could write:
FILE *fp = stdin;
...
while(fgets(line,LINESIZE,fp)) {
...

Get the length of each line in file with C and write in output file

I am a biology student and I am trying to learn perl, python and C and also use the scripts in my work. So, I have a file as follows:
>sequence1
ATCGATCGATCG
>sequence2
AAAATTTT
>sequence3
CCCCGGGG
The output should look like this, that is the name of each sequence and the count of characters in each line and printing the total number of sequences in the end of the file.
sequence1 12
sequence2 8
sequence3 8
Total number of sequences = 3
I could make the perl and python scripts work, this is the python script as an example:
#!/usr/bin/python
import sys
my_file = open(sys.argv[1]) #open the file
my_output = open(sys.argv[2], "w") #open output file
total_sequence_counts = 0
for line in my_file:
if line.startswith(">"):
sequence_name = line.rstrip('\n').replace(">","")
total_sequence_counts += 1
continue
dna_length = len(line.rstrip('\n'))
my_output.write(sequence_name + " " + str(dna_length) + '\n')
my_output.write("Total number of sequences = " + str(total_sequence_counts) + '\n')
Now, I want to write the same script in C, this is what I have achieved so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
input = FILE *fopen(const char *filename, "r");
output = FILE *fopen(const char *filename, "w");
double total_sequence_counts = 0;
char sequence_name[];
char line [4095]; // set a temporary line length
char buffer = (char *) malloc (sizeof(line) +1); // allocate some memory
while (fgets(line, sizeof(line), filename) != NULL) { // read until new line character is not found in line
buffer = realloc(*buffer, strlen(line) + strlen(buffer) + 1); // realloc buffer to adjust buffer size
if (buffer == NULL) { // print error message if memory allocation fails
printf("\n Memory error");
return 0;
}
if (line[0] == ">") {
sequence_name = strcpy(sequence_name, &line[1]);
total_sequence_counts += 1
}
else {
double length = strlen(line);
fprintf(output, "%s \t %ld", sequence_name, length);
}
fprintf(output, "%s \t %ld", "Total number of sequences = ", total_sequence_counts);
}
int fclose(FILE *input); // when you are done working with a file, you should close it using this function.
return 0;
int fclose(FILE *output);
return 0;
}
But this code, of course is full of mistakes, my problem is that despite studying a lot, I still can't properly understand and use the memory allocation and pointers so I know I especially have mistakes in that part. It would be great if you could comment on my code and see how it can turn into a script that actually work. By the way, in my actual data, the length of each line is not defined so I need to use malloc and realloc for that purpose.
For a simple program like this, where you look at short lines one at a time, you shouldn't worry about dynamic memory allocation. It is probably good enough to use local buffers of a reasonable size.
Another thing is that C isn't particularly suited for quick-and-dirty string processing. For example, there isn't a strstrip function in the standard library. You usually end up implementing such behaviour yourself.
An example implementation looks like this:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXLEN 80 /* Maximum line length, including null terminator */
int main(int argc, char *argv[])
{
FILE *in;
FILE *out;
char line[MAXLEN]; /* Current line buffer */
char ref[MAXLEN] = ""; /* Sequence reference buffer */
int nseq = 0; /* Sequence counter */
if (argc != 3) {
fprintf(stderr, "Usage: %s infile outfile\n", argv[0]);
exit(1);
}
in = fopen(argv[1], "r");
if (in == NULL) {
fprintf(stderr, "Couldn't open %s.\n", argv[1]);
exit(1);
}
out = fopen(argv[2], "w");
if (in == NULL) {
fprintf(stderr, "Couldn't open %s for writing.\n", argv[2]);
exit(1);
}
while (fgets(line, sizeof(line), in)) {
int len = strlen(line);
/* Strip whitespace from end */
while (len > 0 && isspace(line[len - 1])) len--;
line[len] = '\0';
if (line[0] == '>') {
/* First char is '>': copy from second char in line */
strcpy(ref, line + 1);
} else {
/* Other lines are sequences */
fprintf(out, "%s: %d\n", ref, len);
nseq++;
}
}
fprintf(out, "Total number of sequences. %d\n", nseq);
fclose(in);
fclose(out);
return 0;
}
A lot of code is about enforcing arguments and opening and closing files. (You could cut out a lot of code if you used stdin and stdout with file redirections.)
The core is the big while loop. Things to note:
fgets returns NULL on error or when the end of file is reached.
The first lines determine the length of the line and then remove white-space from the end.
It is not enough to decrement length, at the end the stripped string must be terminated with the null character '\0'
When you check the first character in the line, you should check against a char, not a string. In C, single and double quotes are not interchangeable. ">" is a string literal of two characters, '>' and the terminating '\0'.
When dealing with countable entities like chars in a string, use integer types, not floating-point numbers. (I've used (signed) int here, but because there can't be a negative number of chars in a line, it might have been better to have used an unsigned type.)
The notation line + 1 is equivalent to &line[1].
The code I've shown doesn't check that there is always one reference per sequence. I'll leave this as exercide to the reader.
For a beginner, this can be quite a lot to keep track of. For small text-processing tasks like yours, Python and Perl are definitely better suited.
Edit: The solution above won't work for long sequences; it is restricted to MAXLEN characters. But you don't need dynamic allocation if you only need the length, not the contents of the sequences.
Here's an updated version that doesn't read lines, but read characters instead. In '>' context, it stored the reference. Otherwise it just keeps a count:
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h> /* for isspace() */
#define MAXLEN 80 /* Maximum line length, including null terminator */
int main(int argc, char *argv[])
{
FILE *in;
FILE *out;
int nseq = 0; /* Sequence counter */
char ref[MAXLEN]; /* Reference name */
in = fopen(argv[1], "r");
out = fopen(argv[2], "w");
/* Snip: Argument and file checking as above */
while (1) {
int c = getc(in);
if (c == EOF) break;
if (c == '>') {
int n = 0;
c = fgetc(in);
while (c != EOF && c != '\n') {
if (n < sizeof(ref) - 1) ref[n++] = c;
c = fgetc(in);
}
ref[n] = '\0';
} else {
int len = 0;
int n = 0;
while (c != EOF && c != '\n') {
n++;
if (!isspace(c)) len = n;
c = fgetc(in);
}
fprintf(out, "%s: %d\n", ref, len);
nseq++;
}
}
fprintf(out, "Total number of sequences. %d\n", nseq);
fclose(in);
fclose(out);
return 0;
}
Notes:
fgetc reads a single byte from a file and returns this byte or EOF when the file has ended. In this implementation, that's the only reading function used.
Storing a reference string is implemented via fgetc here too. You could probably use fgets after skipping the initial angle bracket, too.
The counting just reads bytes without storing them. n is the total count, len is the count up to the last non-space. (Your lines probably consist only of ACGT without any trailing space, so you could skip the test for space and use n instead of len.)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]){
FILE *my_file = fopen(argv[1], "r");
FILE *my_output = fopen(argv[2], "w");
int total_sequence_coutns = 0;
char *sequence_name;
int dna_length;
char *line = NULL;
size_t size = 0;
while(-1 != getline(&line, &size, my_file)){
if(line[0] == '>'){
sequence_name = strdup(strtok(line, ">\n"));
total_sequence_coutns +=1;
continue;
}
dna_length = strlen(strtok(line, "\n"));
fprintf(my_output, "%s %d\n", sequence_name, dna_length);
free(sequence_name);
}
fprintf(my_output, "Total number of sequences = %d\n", total_sequence_coutns);
fclose(my_file);
fclose(my_output);
free(line);
return (0);
}

Trying to simulate grep command

I am getting segmentation fault when i compile my code.
I am not getting what is wrong with my code will be happy if someone can help me.
#include<stdio.h>
#include<string.h>
int main(int argc,char *argv[])
{
FILE *fp;
char fline[100];
char *newline;
int i,count=0,occ=0;
fp=fopen(argv[1],"r");
while(fgets(fline,100,fp)!=NULL)
{
count++;
if(newline=strchr(fline,'\n'))
*newline='\0';
if(strstr(fline,argv[2])!=NULL)
{
printf("%s %d %s",argv[1],count,fline);
occ++;
}
}
printf("\n Occurence= %d",occ);
return 1;
}
See man open and man fopen:
FILE *fp;
...
fp=open(argv[1],"r");
open returns an integer, not a file pointer. Just change that line to
fp=fopen(argv[1],"r");
Note: OP edited this error out of the code in the question, for those who wonder what this is about
Which leads us to (some other minor issues addressed as well - see comments):
+EDIT: point to places where error checking should be done:
#include<stdio.h>
#include<string.h>
#include <errno.h>
int main(int argc, char *argv[]) {
FILE *fp;
char fline[100];
char *newline;
int i, count = 0, occ = 0;
// for starters, ensure that enough arguments were passed:
if (argc < 3) {
printf("Not enough command line parameters given!\n");
return 3;
}
fp = fopen(argv[1], "r");
// fopen will return if something goes wrong. In that case errno will
// contain the error code describing the problem (could be used with
// strerror to produce a user friendly error message
if (fp == NULL) {
printf("File could not be opened, found or whatever, errno is %d\n",errno);
return 3;
}
while (fgets(fline, 100, fp) != NULL) {
count++;
if (newline = strchr(fline, '\n'))
*newline = '\0';
if (strstr(fline, argv[2]) != NULL) {
// you probably want each found line on a separate line,
// so I added \n
printf("%s %d %s\n", argv[1], count, fline);
occ++;
}
}
// it's good practice to end your last print in \n
// that way at least your command prompt stars in the left column
printf("\n Occurence= %d", occ);
return 1;
}
ps: so the error occurs during runtime and not during compile time - this distinction is quite crucial, because hunting down a compiler failure and solving a library usage error require rather different techniques...

Going through a text file line by line in C

I have been working on a small exercise for my CIS class and am very confused by the methods C uses to read from a file. All that I really need to do is read through a file line by line and use the information gathered from each line to do a few manipulations. I tried using the getline method and others with no luck.
My code is currently as follows:
int main(char *argc, char* argv[]){
const char *filename = argv[0];
FILE *file = fopen(filename, "r");
char *line = NULL;
while(!feof(file)){
sscanf(line, filename, "%s");
printf("%s\n", line);
}
return 1;
}
Right now I am getting a seg fault with the sscanf method and I am not sure why. I am a total C noob and just wondering if there was some big picture thing that I was missing.
Thanks
So many problems in so few lines. I probably forget some:
argv[0] is the program name, not the first argument;
if you want to read in a variable, you have to allocate its memory
one never loops on feof, one loops on an IO function until it fails, feof then serves to determinate the reason of failure,
sscanf is there to parse a line, if you want to parse a file, use fscanf,
"%s" will stop at the first space as a format for the ?scanf family
to read a line, the standard function is fgets,
returning 1 from main means failure
So
#include <stdio.h>
int main(int argc, char* argv[])
{
char const* const fileName = argv[1]; /* should check that argc > 1 */
FILE* file = fopen(fileName, "r"); /* should check the result */
char line[256];
while (fgets(line, sizeof(line), file)) {
/* note that fgets don't strip the terminating \n, checking its
presence would allow to handle lines longer that sizeof(line) */
printf("%s", line);
}
/* may check feof here to make a difference between eof and io failure -- network
timeout for instance */
fclose(file);
return 0;
}
To read a line from a file, you should use the fgets function: It reads a string from the specified file up to either a newline character or EOF.
The use of sscanf in your code would not work at all, as you use filename as your format string for reading from line into a constant string literal %s.
The reason for SEGV is that you write into the non-allocated memory pointed to by line.
In addition to the other answers, on a recent C library (Posix 2008 compliant), you could use getline. See this answer (to a related question).
Say you're dealing with some other delimiter, such as a \t tab, instead of a \n newline.
A more general approach to delimiters is the use of getc(), which grabs one character at a time.
Note that getc() returns an int, so that we can test for equality with EOF.
Secondly, we define an array line[BUFFER_MAX_LENGTH] of type char, in order to store up to BUFFER_MAX_LENGTH-1 characters on the stack (we have to save that last character for a \0 terminator character).
Use of an array avoids the need to use malloc and free to create a character pointer of the right length on the heap.
#define BUFFER_MAX_LENGTH 1024
int main(int argc, char* argv[])
{
FILE *file = NULL;
char line[BUFFER_MAX_LENGTH];
int tempChar;
unsigned int tempCharIdx = 0U;
if (argc == 2)
file = fopen(argv[1], "r");
else {
fprintf(stderr, "error: wrong number of arguments\n"
"usage: %s textfile\n", argv[0]);
return EXIT_FAILURE;
}
if (!file) {
fprintf(stderr, "error: could not open textfile: %s\n", argv[1]);
return EXIT_FAILURE;
}
/* get a character from the file pointer */
while(tempChar = fgetc(file))
{
/* avoid buffer overflow error */
if (tempCharIdx == BUFFER_MAX_LENGTH) {
fprintf(stderr, "error: line is too long. increase BUFFER_MAX_LENGTH.\n");
return EXIT_FAILURE;
}
/* test character value */
if (tempChar == EOF) {
line[tempCharIdx] = '\0';
fprintf(stdout, "%s\n", line);
break;
}
else if (tempChar == '\n') {
line[tempCharIdx] = '\0';
tempCharIdx = 0U;
fprintf(stdout, "%s\n", line);
continue;
}
else
line[tempCharIdx++] = (char)tempChar;
}
return EXIT_SUCCESS;
}
If you must use a char *, then you can still use this code, but you strdup() the line[] array, once it is filled up with a line's worth of input. You must free this duplicated string once you're done with it, or you'll get a memory leak:
#define BUFFER_MAX_LENGTH 1024
int main(int argc, char* argv[])
{
FILE *file = NULL;
char line[BUFFER_MAX_LENGTH];
int tempChar;
unsigned int tempCharIdx = 0U;
char *dynamicLine = NULL;
if (argc == 2)
file = fopen(argv[1], "r");
else {
fprintf(stderr, "error: wrong number of arguments\n"
"usage: %s textfile\n", argv[0]);
return EXIT_FAILURE;
}
if (!file) {
fprintf(stderr, "error: could not open textfile: %s\n", argv[1]);
return EXIT_FAILURE;
}
while(tempChar = fgetc(file))
{
/* avoid buffer overflow error */
if (tempCharIdx == BUFFER_MAX_LENGTH) {
fprintf(stderr, "error: line is too long. increase BUFFER_MAX_LENGTH.\n");
return EXIT_FAILURE;
}
/* test character value */
if (tempChar == EOF) {
line[tempCharIdx] = '\0';
dynamicLine = strdup(line);
fprintf(stdout, "%s\n", dynamicLine);
free(dynamicLine);
dynamicLine = NULL;
break;
}
else if (tempChar == '\n') {
line[tempCharIdx] = '\0';
tempCharIdx = 0U;
dynamicLine = strdup(line);
fprintf(stdout, "%s\n", dynamicLine);
free(dynamicLine);
dynamicLine = NULL;
continue;
}
else
line[tempCharIdx++] = (char)tempChar;
}
return EXIT_SUCCESS;
}

Resources