Going through a text file line by line in C - c

I have been working on a small exercise for my CIS class and am very confused by the methods C uses to read from a file. All that I really need to do is read through a file line by line and use the information gathered from each line to do a few manipulations. I tried using the getline method and others with no luck.
My code is currently as follows:
int main(char *argc, char* argv[]){
const char *filename = argv[0];
FILE *file = fopen(filename, "r");
char *line = NULL;
while(!feof(file)){
sscanf(line, filename, "%s");
printf("%s\n", line);
}
return 1;
}
Right now I am getting a seg fault with the sscanf method and I am not sure why. I am a total C noob and just wondering if there was some big picture thing that I was missing.
Thanks

So many problems in so few lines. I probably forget some:
argv[0] is the program name, not the first argument;
if you want to read in a variable, you have to allocate its memory
one never loops on feof, one loops on an IO function until it fails, feof then serves to determinate the reason of failure,
sscanf is there to parse a line, if you want to parse a file, use fscanf,
"%s" will stop at the first space as a format for the ?scanf family
to read a line, the standard function is fgets,
returning 1 from main means failure
So
#include <stdio.h>
int main(int argc, char* argv[])
{
char const* const fileName = argv[1]; /* should check that argc > 1 */
FILE* file = fopen(fileName, "r"); /* should check the result */
char line[256];
while (fgets(line, sizeof(line), file)) {
/* note that fgets don't strip the terminating \n, checking its
presence would allow to handle lines longer that sizeof(line) */
printf("%s", line);
}
/* may check feof here to make a difference between eof and io failure -- network
timeout for instance */
fclose(file);
return 0;
}

To read a line from a file, you should use the fgets function: It reads a string from the specified file up to either a newline character or EOF.
The use of sscanf in your code would not work at all, as you use filename as your format string for reading from line into a constant string literal %s.
The reason for SEGV is that you write into the non-allocated memory pointed to by line.

In addition to the other answers, on a recent C library (Posix 2008 compliant), you could use getline. See this answer (to a related question).

Say you're dealing with some other delimiter, such as a \t tab, instead of a \n newline.
A more general approach to delimiters is the use of getc(), which grabs one character at a time.
Note that getc() returns an int, so that we can test for equality with EOF.
Secondly, we define an array line[BUFFER_MAX_LENGTH] of type char, in order to store up to BUFFER_MAX_LENGTH-1 characters on the stack (we have to save that last character for a \0 terminator character).
Use of an array avoids the need to use malloc and free to create a character pointer of the right length on the heap.
#define BUFFER_MAX_LENGTH 1024
int main(int argc, char* argv[])
{
FILE *file = NULL;
char line[BUFFER_MAX_LENGTH];
int tempChar;
unsigned int tempCharIdx = 0U;
if (argc == 2)
file = fopen(argv[1], "r");
else {
fprintf(stderr, "error: wrong number of arguments\n"
"usage: %s textfile\n", argv[0]);
return EXIT_FAILURE;
}
if (!file) {
fprintf(stderr, "error: could not open textfile: %s\n", argv[1]);
return EXIT_FAILURE;
}
/* get a character from the file pointer */
while(tempChar = fgetc(file))
{
/* avoid buffer overflow error */
if (tempCharIdx == BUFFER_MAX_LENGTH) {
fprintf(stderr, "error: line is too long. increase BUFFER_MAX_LENGTH.\n");
return EXIT_FAILURE;
}
/* test character value */
if (tempChar == EOF) {
line[tempCharIdx] = '\0';
fprintf(stdout, "%s\n", line);
break;
}
else if (tempChar == '\n') {
line[tempCharIdx] = '\0';
tempCharIdx = 0U;
fprintf(stdout, "%s\n", line);
continue;
}
else
line[tempCharIdx++] = (char)tempChar;
}
return EXIT_SUCCESS;
}
If you must use a char *, then you can still use this code, but you strdup() the line[] array, once it is filled up with a line's worth of input. You must free this duplicated string once you're done with it, or you'll get a memory leak:
#define BUFFER_MAX_LENGTH 1024
int main(int argc, char* argv[])
{
FILE *file = NULL;
char line[BUFFER_MAX_LENGTH];
int tempChar;
unsigned int tempCharIdx = 0U;
char *dynamicLine = NULL;
if (argc == 2)
file = fopen(argv[1], "r");
else {
fprintf(stderr, "error: wrong number of arguments\n"
"usage: %s textfile\n", argv[0]);
return EXIT_FAILURE;
}
if (!file) {
fprintf(stderr, "error: could not open textfile: %s\n", argv[1]);
return EXIT_FAILURE;
}
while(tempChar = fgetc(file))
{
/* avoid buffer overflow error */
if (tempCharIdx == BUFFER_MAX_LENGTH) {
fprintf(stderr, "error: line is too long. increase BUFFER_MAX_LENGTH.\n");
return EXIT_FAILURE;
}
/* test character value */
if (tempChar == EOF) {
line[tempCharIdx] = '\0';
dynamicLine = strdup(line);
fprintf(stdout, "%s\n", dynamicLine);
free(dynamicLine);
dynamicLine = NULL;
break;
}
else if (tempChar == '\n') {
line[tempCharIdx] = '\0';
tempCharIdx = 0U;
dynamicLine = strdup(line);
fprintf(stdout, "%s\n", dynamicLine);
free(dynamicLine);
dynamicLine = NULL;
continue;
}
else
line[tempCharIdx++] = (char)tempChar;
}
return EXIT_SUCCESS;
}

Related

How to read from a file and parse it

I have a file .txt containing some values formatted like this:
0,30,25,10
Now, I open up the file and store it into an array
char imposta_tratt[300];
FILE *fp;
fp = fopen("/home/pi/Documents/imposta_trattamento.txt", "r");
if (fp == 0) return;
fread(imposta_tratt, sizeof(imposta_tratt), 1, fp);
fclose(fp);
Now I expect to have the array filled with my data. I have the values separated by a , so I go on and parse it:
const char delim[2] = ",";
int t=0;
char *token = strtok(imposta_tratt, delim);
while (token!=NULL){
strcpy(tratt[t],token);
token = strtok(NULL, delim);
tratt[t]=token;
t++;
}
Here, referring to what's in the file .txt, I expect to have tratt[0]=0; tratt[1]=30; tratt[2]=25; and so on, but seems like I am missing something since it's not like this.
All I want is to have the values of the txt file stored in single variables. Can someone help?
What you are trying to achieve can simply be done using fgets():
bool read_file_content(const char *filename, const size_t tsizemax, int tratt[tsizemax], size_t *tsize, const char *delim)
{
// Attempt to open filename.
FILE *fp = fopen(filename, "r");
if (!fp) return false; // Return false upon failure.
// Try to read one line. If you have more, you need a while loop.
char imposta_tratt[300];
if (!fgets(imposta_tratt, sizeof imposta_tratt, fp)) {
fclose(fp);
return false;
}
*tsize = 0;
char tmp[300]; // Temporary buffer. Used for conversion into int.
char *token = strtok(imposta_tratt, delim);
while (token && *tsize < tsizemax) {
strncpy(tmp, token, sizeof tmp);
tratt[(*tsize)++] = atoi(tmp);
token = strtok(NULL, delim);
}
fclose(fp);
return true;
}
const char *filename: The file you want to parse.
const size_t tsizemax: The maximum size of your tratt array. It is important to control the size, otherwise your code will have buffer overflow (think of when your file has more than 100 tokens, for example).
int tratt[tsizemax]: The array that will hold the values.
size_t *tsize: The number of tokens read (used in combination of tsizemax).
const char *delim: The delimiter(s), in your case a ,.
This is your main():
int main(void)
{
int tratt[100];
size_t size = 0;
if (!read_file_content("in.txt", 100, tratt, &size, ",")) {
puts("Failed");
return 1;
}
for (size_t i = 0; i < size; ++i)
printf("%d\n", tratt[i]);
}
Output:
0
30
25
10
Suppose "in.txt" has contents
0,30,25,10
The below program uses fscanf to read the integers into the tratt array, one-by-one. As we read integers using fscanf, we make sure it's return value is as expected. If not, we close the file and exit. In the event that the return value of fscanf is not as expected, the program also prints which type of error occurred. Currently, if any error occurs, the program stops. However, you can make the program behave differently depending on the error that occurred if you like.
As output, the program prints all of the integers read into the tratt array. The output is
0
30
25
10
Now this program assumes we know the number of elements we want to read into tratt. If we do not, we could allow for dynamically allocating more memory should the array need more elements or perhaps "in.txt" could contain a data structure, say, at the beginning/end of the file that records information about the file, such as the number of numbers in the file and the data type (a binary file would be best suited for this). These are just a couple of the possibilities.
A better approach might be to read characters in one-by-one (say, using getc) and use strtol to convert a sequence of character digits to a long int (I would have taken an approach similar to this).
Nevertheless, this approach is more succinct and should suffice.
#include <stdio.h>
#include <stdlib.h>
#define FILE_NAME "in.txt"
#define MAX_LEN 4
int main(void) {
int i, tratt[MAX_LEN];
FILE *fp = fopen(FILE_NAME, "r"); /* open file for reading */
/* if cannot open file */
if (fp == NULL) {
printf("Cannot open %s\n", FILE_NAME);
exit(EXIT_FAILURE);
}
/* read integer, checking return value of scanf as expected */
if (fscanf(fp, "%d", &tratt[0]) != 1) {
if (ferror(fp))
printf("fscanf: read error\n");
else if (feof(fp))
printf("fscanf: end of file\n");
else
printf("fscanf: matching failure\n");
fclose(fp);
exit(EXIT_FAILURE);
}
for (i = 1; i < MAX_LEN; i++)
/* read comma plus integer, checking return value of scanf */
if (fscanf(fp, ",%d", &tratt[i]) != 1) {
if (ferror(fp))
printf("fscanf: read error\n");
else if (feof(fp))
printf("fscanf: end of file\n");
else
printf("fscanf: matching failure\n");
fclose(fp);
exit(EXIT_FAILURE);
}
fclose(fp); /* close file */
/* print integers stored in tratt */
for (i = 0; i < MAX_LEN; i++)
printf("%d\n", tratt[i]);
return 0;
}

Segmentation fault (core dumped) due to fgets - I think

but I keep getting this error when I run this program. I think it's because of the fgets function. I tried initializing the input variable to NULL to see if that'll help, but it didn't. I also have a hunch that I might need to malloc to solve the problem. But your help is highly appreciated.
int main(int argc, char* argv[])
{
char* input = NULL;
// ensure one and only one command line argument
if (argc != 2)
{
printf("Usage: %s [name of document]\n", argv[0]);
return 1;
}
// open a new document for writing
FILE* fp = fopen(argv[1], "w");
// check for successful open
if(fp == NULL)
{
printf("Could not create %s\n", argv[1]);
return 2;
}
// get text from user and save to file
while(true)
{
// get text from user
printf("Enter a new line of text (or \"quit\"):\n");
fgets(input, 50, stdin);
// if user wants to quit
if (input != NULL && strcmp(input, "quit") == 0)
{
free(input);
break;
}
// if user wants to enter text
else if (input != NULL)
{
fputs(input, fp);
fputs("\n", fp);
printf("CHA-CHING!\n\n");
free(input);
}
}
// close the file and end successfuly
fclose(fp);
return 0;
}
You never malloc-ed input, so yeah, fgets is dereferencing the NULL pointer as its buffer, and that's going to die. Either change input to a stack array (and remove the free for it) or actually call malloc to allocate memory so input isn't pointing to NULL.
Their are some problems in your code.
You have not allocated memory to input character pointer. Hence you can't store characters in it, hence you get segmentation fault.
Also you are freeing more than once, which is incorrect.
So, a code, with the above modification would be something like this:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[])
{
char* input = malloc(sizeof(char) * 50);
// ensure one and only one command line argument
if (argc != 2)
{
printf("Usage: %s [name of document]\n", argv[0]);
return 1;
}
// open a new document for writing
FILE* fp = fopen(argv[1], "w");
// check for successful open
if(fp == NULL)
{
printf("Could not create %s\n", argv[1]);
return 2;
}
// get text from user and save to file
while(1)
{
// get text from user
printf("Enter a new line of text (or \"quit\"):\n");
fgets(input, 50, stdin);
// if user wants to quit
if (input != NULL && strcmp(input, "quit\n") == 0)
{
free(input);
break;
}
// if user wants to enter text
else if (input != NULL)
{
fputs(input, fp);
fputs("\n", fp);
printf("CHA-CHING!\n\n");
// free(input);
}
}
// close the file and end successfuly
fclose(fp);
return 0;
}
Hope it helps your problem.
Cheers.
While you can use malloc() here, it is not really necessary. You can #define a reasonable maximum line length, and declare a character array to hold the input. If you do this, you can remove the frees from your code.
You also have an issue with the way that you are using fgets(). The trailing \n is kept by fgets(), but your comparisons are ignoring this. Consequently, input is never equal to "quit", and is certainly never NULL. I have included some code that removes the trailing newline after reading into input; the code also clears any remaining characters from the input stream, which is possible in the event that the user enters more than MAXLINE - 1 characters. The test for text input is then simply if (input[0]). Alternatively, you could change your tests to take into account the extra '\n' character.
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#define MAXLINE 1000
int main(int argc, char* argv[])
{
char input[MAXLINE];
char *ch; // used to remove newline
char c; // used to clear input stream
// ensure one and only one command line argument
if (argc != 2)
{
printf("Usage: %s [name of document]\n", argv[0]);
return 1;
}
// open a new document for writing
FILE* fp = fopen(argv[1], "w");
// check for successful open
if(fp == NULL)
{
printf("Could not create %s\n", argv[1]);
return 2;
}
// get text from user and save to file
while(true)
{
// get text from user
printf("Enter a new line of text (or \"quit\"):\n");
fgets(input, MAXLINE, stdin);
// remove trailing newline
ch = input;
while (*ch != '\n' && *ch != '\0') {
++ch;
}
if (*ch) {
*ch = '\0';
} else { // remove any extra characters in input stream
while ((c = getchar()) != '\n' && c != EOF)
continue;
}
// if user wants to quit
if (strcmp(input, "quit") == 0)
{
break;
}
// if user wants to enter text
else if (input[0])
{
fputs(input, fp);
fputs("\n", fp);
printf("CHA-CHING!\n\n");
}
}
// close the file and end successfuly
fclose(fp);
return 0;
}
I think it's because of the fgets function.
Yes: passing NULL pointer to fgets makes no sense, isn't allowed, and will cause a crash.
I might need to malloc to solve the problem.
You need to pass a pointer to a suitable buffer for fgets to read input into. Whether that buffer is malloced, a local or a global array, is irrelevant.
TL;DR: think about what you are doing.

Check if all char values are present in string

I'm currently working on this assignment and I'm stuck. The objective is to read a file and find if these char values exist in the String from the file. I have to compare a String from a file to another String I put in as an argument. However, just as long as each char value is in the String from the file then it "matches".
Example (input and output):
./a.out file1 done
done is in bonehead
done is not in doggie
Example (file1):
bonehead
doggie
As you can see the order in which is compares Strings does not matter and the file also follows one word per line. I've put together a program that finds if the char value is present in the other String but that is only part of the problem. Any idea how to go about this?
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char **argv){
FILE *f = fopen(argv[1], "r");
char *line = NULL;
size_t len = 0;
ssize_t read;
char *word = argv[2];
if(argc != 3){
printf("./a.out <file> <word>\n");
exit(EXIT_SUCCESS);
}
if(f == NULL){
printf("file empty\n");
exit(EXIT_SUCCESS);
}
// confused what this loop does too
while((read = getline(&line, &len, f)) != -1){
char *c = line;
while(*c){
if(strchr(word, *c))
printf("can't spell \"%s\" without \"%s\"!\n", line, word);
else
printf("no \"%s\" in \"%s\".\n", word, line);
c++;
}
}
fclose(f);
exit(EXIT_SUCCESS);
}
Another approach would simply keep a sum of each character matched in the line read from the file, adding one for each unique character in the word supplied to test, and if the sum is equal to the length of the string made up by the unique characters is the search term, then each of the unique characters in the search term are included in the line read from the file.
#include <stdio.h>
#include <string.h>
#define MAXC 256
int main (int argc, char **argv) {
if (argc < 3 ) { /* validate required arguments */
fprintf (stderr, "error: insufficient input, usage: %s file string\n",
argv[0]);
return 1;
}
FILE *fp = fopen (argv[1], "r");
char line[MAXC] = "";
char *s = argv[2]; /* string holding search string */
size_t slen = strlen(s), sum = 0, ulen;
char uniq[slen+1]; /* unique characters in s */
if (!fp) { /* validate file open */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
memset (uniq, 0, slen+1); /* zero the VLA */
/* fill uniq with unique characters from s */
for (; *s; s++) if (!strchr (uniq, *s)) uniq[sum++] = *s;
ulen = strlen (uniq);
s = argv[2]; /* reset s */
while (fgets (line, MAXC, fp)) { /* for each line in file */
if (strlen (line) - 1 < ulen) { /* short line, continue */
printf ("%s is not in %s", s, line);
continue;
}
char *up = uniq; /* ptr to uniq */
sum = 0; /* reset sum */
while (*up) if (strchr (line, *up++)) sum++; /* count chars */
if (sum < ulen) /* validate sum */
printf ("%s is not in %s", s, line);
else
printf ("%s is in %s", s, line);
}
fclose (fp); /* close file */
return 0;
}
Example Use/Output
$ ./bin/strallcinc dat/words.txt done
done is in bonehead
done is not in doggie
which would work equally well for duplicate characters in the search string. e.g.
$ ./bin/strallcinc dat/words.txt doneddd
doneddd is in bonehead
doneddd is not in doggie
You can decide if you would handle duplicate characters differently, but you should make some determination on how that contingency will be addressed.
Let me know if you have any questions.
confused what this loop does
The while (read ... line obviously reads in lines from your file, placing them in the line variable
*c is a pointer to the start of the variable line and this pointer is incremented by c++, so that each letter in the word from the file is accessed. The while loop will be terminated when *c points to the null terminator (0).
The if (strchr(word ... line is testing if the test word contains one of the letters from the word in the file.
This seems to be the reverse of what you are trying to do - finding if all the letters in the test word can be found in the word from the file.
The printf lines are not sensible because there is no either/or - you need one line to print 'yes' our letters are present and one line to print 'no' at least one letter is not present.
The printf statements should be outside the comparison loop, so that you don't get multiple lines of output for each word. Add a flag to show if any letter does not exist in the word. Set flag to 1 at start, and only change it to 0 when a letter is not present, then use the flag to print one of the two outcome statements.
This code snippet may help
/* set flag to 'letters all present' */
int flag = 1;
/* set pointer c to start of input line */
c = word;
/* test word from file for each letter in test word */
while(*c) {
if(strchr(line, *c) == NULL) {
/* set flag to letter not present */
flag = 0;
break;
}
c++;
}

Get the length of each line in file with C and write in output file

I am a biology student and I am trying to learn perl, python and C and also use the scripts in my work. So, I have a file as follows:
>sequence1
ATCGATCGATCG
>sequence2
AAAATTTT
>sequence3
CCCCGGGG
The output should look like this, that is the name of each sequence and the count of characters in each line and printing the total number of sequences in the end of the file.
sequence1 12
sequence2 8
sequence3 8
Total number of sequences = 3
I could make the perl and python scripts work, this is the python script as an example:
#!/usr/bin/python
import sys
my_file = open(sys.argv[1]) #open the file
my_output = open(sys.argv[2], "w") #open output file
total_sequence_counts = 0
for line in my_file:
if line.startswith(">"):
sequence_name = line.rstrip('\n').replace(">","")
total_sequence_counts += 1
continue
dna_length = len(line.rstrip('\n'))
my_output.write(sequence_name + " " + str(dna_length) + '\n')
my_output.write("Total number of sequences = " + str(total_sequence_counts) + '\n')
Now, I want to write the same script in C, this is what I have achieved so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
input = FILE *fopen(const char *filename, "r");
output = FILE *fopen(const char *filename, "w");
double total_sequence_counts = 0;
char sequence_name[];
char line [4095]; // set a temporary line length
char buffer = (char *) malloc (sizeof(line) +1); // allocate some memory
while (fgets(line, sizeof(line), filename) != NULL) { // read until new line character is not found in line
buffer = realloc(*buffer, strlen(line) + strlen(buffer) + 1); // realloc buffer to adjust buffer size
if (buffer == NULL) { // print error message if memory allocation fails
printf("\n Memory error");
return 0;
}
if (line[0] == ">") {
sequence_name = strcpy(sequence_name, &line[1]);
total_sequence_counts += 1
}
else {
double length = strlen(line);
fprintf(output, "%s \t %ld", sequence_name, length);
}
fprintf(output, "%s \t %ld", "Total number of sequences = ", total_sequence_counts);
}
int fclose(FILE *input); // when you are done working with a file, you should close it using this function.
return 0;
int fclose(FILE *output);
return 0;
}
But this code, of course is full of mistakes, my problem is that despite studying a lot, I still can't properly understand and use the memory allocation and pointers so I know I especially have mistakes in that part. It would be great if you could comment on my code and see how it can turn into a script that actually work. By the way, in my actual data, the length of each line is not defined so I need to use malloc and realloc for that purpose.
For a simple program like this, where you look at short lines one at a time, you shouldn't worry about dynamic memory allocation. It is probably good enough to use local buffers of a reasonable size.
Another thing is that C isn't particularly suited for quick-and-dirty string processing. For example, there isn't a strstrip function in the standard library. You usually end up implementing such behaviour yourself.
An example implementation looks like this:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXLEN 80 /* Maximum line length, including null terminator */
int main(int argc, char *argv[])
{
FILE *in;
FILE *out;
char line[MAXLEN]; /* Current line buffer */
char ref[MAXLEN] = ""; /* Sequence reference buffer */
int nseq = 0; /* Sequence counter */
if (argc != 3) {
fprintf(stderr, "Usage: %s infile outfile\n", argv[0]);
exit(1);
}
in = fopen(argv[1], "r");
if (in == NULL) {
fprintf(stderr, "Couldn't open %s.\n", argv[1]);
exit(1);
}
out = fopen(argv[2], "w");
if (in == NULL) {
fprintf(stderr, "Couldn't open %s for writing.\n", argv[2]);
exit(1);
}
while (fgets(line, sizeof(line), in)) {
int len = strlen(line);
/* Strip whitespace from end */
while (len > 0 && isspace(line[len - 1])) len--;
line[len] = '\0';
if (line[0] == '>') {
/* First char is '>': copy from second char in line */
strcpy(ref, line + 1);
} else {
/* Other lines are sequences */
fprintf(out, "%s: %d\n", ref, len);
nseq++;
}
}
fprintf(out, "Total number of sequences. %d\n", nseq);
fclose(in);
fclose(out);
return 0;
}
A lot of code is about enforcing arguments and opening and closing files. (You could cut out a lot of code if you used stdin and stdout with file redirections.)
The core is the big while loop. Things to note:
fgets returns NULL on error or when the end of file is reached.
The first lines determine the length of the line and then remove white-space from the end.
It is not enough to decrement length, at the end the stripped string must be terminated with the null character '\0'
When you check the first character in the line, you should check against a char, not a string. In C, single and double quotes are not interchangeable. ">" is a string literal of two characters, '>' and the terminating '\0'.
When dealing with countable entities like chars in a string, use integer types, not floating-point numbers. (I've used (signed) int here, but because there can't be a negative number of chars in a line, it might have been better to have used an unsigned type.)
The notation line + 1 is equivalent to &line[1].
The code I've shown doesn't check that there is always one reference per sequence. I'll leave this as exercide to the reader.
For a beginner, this can be quite a lot to keep track of. For small text-processing tasks like yours, Python and Perl are definitely better suited.
Edit: The solution above won't work for long sequences; it is restricted to MAXLEN characters. But you don't need dynamic allocation if you only need the length, not the contents of the sequences.
Here's an updated version that doesn't read lines, but read characters instead. In '>' context, it stored the reference. Otherwise it just keeps a count:
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h> /* for isspace() */
#define MAXLEN 80 /* Maximum line length, including null terminator */
int main(int argc, char *argv[])
{
FILE *in;
FILE *out;
int nseq = 0; /* Sequence counter */
char ref[MAXLEN]; /* Reference name */
in = fopen(argv[1], "r");
out = fopen(argv[2], "w");
/* Snip: Argument and file checking as above */
while (1) {
int c = getc(in);
if (c == EOF) break;
if (c == '>') {
int n = 0;
c = fgetc(in);
while (c != EOF && c != '\n') {
if (n < sizeof(ref) - 1) ref[n++] = c;
c = fgetc(in);
}
ref[n] = '\0';
} else {
int len = 0;
int n = 0;
while (c != EOF && c != '\n') {
n++;
if (!isspace(c)) len = n;
c = fgetc(in);
}
fprintf(out, "%s: %d\n", ref, len);
nseq++;
}
}
fprintf(out, "Total number of sequences. %d\n", nseq);
fclose(in);
fclose(out);
return 0;
}
Notes:
fgetc reads a single byte from a file and returns this byte or EOF when the file has ended. In this implementation, that's the only reading function used.
Storing a reference string is implemented via fgetc here too. You could probably use fgets after skipping the initial angle bracket, too.
The counting just reads bytes without storing them. n is the total count, len is the count up to the last non-space. (Your lines probably consist only of ACGT without any trailing space, so you could skip the test for space and use n instead of len.)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]){
FILE *my_file = fopen(argv[1], "r");
FILE *my_output = fopen(argv[2], "w");
int total_sequence_coutns = 0;
char *sequence_name;
int dna_length;
char *line = NULL;
size_t size = 0;
while(-1 != getline(&line, &size, my_file)){
if(line[0] == '>'){
sequence_name = strdup(strtok(line, ">\n"));
total_sequence_coutns +=1;
continue;
}
dna_length = strlen(strtok(line, "\n"));
fprintf(my_output, "%s %d\n", sequence_name, dna_length);
free(sequence_name);
}
fprintf(my_output, "Total number of sequences = %d\n", total_sequence_coutns);
fclose(my_file);
fclose(my_output);
free(line);
return (0);
}

Printing out each line of an input file twice?

I am writing code which very simply reads in a file and prints out what was in the file appropriately.
I have always struggled with getting such a program to terminate upon end of file and think I've found the appropriate solution, however each line is printing twice in my output, for a reason beyond me.
Here is my main file:
int main(int argc, char *argv[]) {
// insure 2 arguments given, one for a.out and one for the test file
if (argc != 2) {
// result if request fails
printf("Requires 2 arguments. Be sure to include test file location\n");
return 0;
}
FILE *fp; //open the file
fp = fopen(argv[1], "r");
char option;
int key;
int i = 0;
while (fscanf(fp, "%c %d", &option, &key) != EOF) {
printf("%d\n", key);
}
}
The key is printing twice!
Hopefully this is a simple error I'm just overlooking due to overexposure to the problem.
You probably want:
fscanf(fp, "%c %d\n", &option, &key);
And you also want to check the return value of fscanf to make sure it equals 2.
In the first iteration of your loop, the newline is not being consumed.
In the second iteration, the newline is consumed and put in option, and the %d does not match, and fscanf returns 1. key is unchanged which is why it gets printed again.
In the third iteration, fscanf finally returns EOF.
General rule: Always check return values to ensure they are what you expect. (You also violate this rule by failing to check the return from fopen.) At worst it does nothing; at best, it helps you debug problems like this.
#include <stdio.h>
int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr, "Requires 1 argument - a file name\n");
return 1;
}
FILE *fp; //open the file
if ((fp = fopen(argv[1], "r")) == 0)
{
fprintf(stderr, "Failed to open file %s\n", argv[1]);
return 1;
}
char option;
int key;
while (fscanf(fp, "%c %d", &option, &key) == 2)
printf("%d\n", key);
return 0;
}
Note the changes in error reporting, and in the file reading process. The code is still probably not quite what you want; you might get the newline after the number after the first line of input stored in option after the first line. Fixing that requires fgets() and sscanf():
#include <stdio.h>
int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr, "Requires 1 argument - a file name\n");
return 1;
}
FILE *fp; //open the file
if ((fp = fopen(argv[1], "r")) == 0)
{
fprintf(stderr, "Failed to open file %s\n", argv[1]);
return 1;
}
char buffer[1024];
while (fgets(buffer, sizeof(buffer), fp) != 0)
{
char option;
int key;
if (fscanf(fp, "%c %d", &option, &key) == 2)
printf("%d\n", key);
else
{
fprintf(stderr, "Format mismatch on %s", buffer);
fclose(fp); // Not 100% necessary here, but tidiness is important
return 1;
}
}
fclose(fp); // Not 100% necessary here, but tidiness is important.
return 0;
}
Although I closed fp before the end, it is not crucial when the program is about to exit, and return from main() is pretty much equivalent to exit(). If it was in a function other than main() though, it is very important to ensure that you free any resource you allocate, such as the file stream fp.
Warning: uncompiled code. Caveat Lector.

Resources