is there any way to store data in an array from a file using getc() function, not fscanf()?
For example, there is a file "file.txt" containing following data:
Name
Surname
Age
So the content of the array arr[] would be:
arr[] = {Name, Surname, Age}
So far I could only print out the content of the file to the console:
#include <stdio.h>
int main(void)
{
setvbuf(stdout, NULL, _IONBF, 0);
FILE *file = fopen("file.txt", "r");
int ch;
while (((ch = getc(file)) != EOF) )
{
putc(ch, stdout);
}
return 0;
}
It was a requirement of the assignment that libraries other than stdio.h cannot be used. I can easily do it with fscanf() but getc() function creates some not some difficulties.
is there any way to store data in an array from a file using getc() function, not fscanf()?
Yes. By reading and assigning values to an element of an array, and increasing the index for each character. Repeat this until a newline character is found, or the file is exhausted.
Make sure to leave room for the null-terminating byte (\0') in order to create a string, by placing one after the last character read.
A quick alteration to your example:
#include <stdio.h>
#define FILENAME "file.txt"
#define MAXSIZE 128
int main(void)
{
setvbuf(stdout, NULL, _IONBF, 0);
FILE *file = fopen(FILENAME, "r");
if (!file) {
perror(FILENAME);
return 1;
}
int ch;
size_t i = 0;
char array[MAXSIZE];
while (i < MAXSIZE - 1 && ((ch = getc(file)) != EOF)) {
if (ch == '\n')
break;
array[i++] = ch;
}
/* null-terminate the array to create a string */
array[i] = '\0';
fclose(file);
puts(array);
}
If you extract this logic into its own function, that accepts an array to fill, a character limit, and the file to read from, and you will have largely recreated fgets.
Related
I'm new to programming in C. And I'm trying to print the first 10 lines of a text file. When I run my program with a text file containing 11 lines of text, only the first line is displayed. I'm not sure why it does that, but I suspect there is something wrong in my while loop. Can someone please help me?
#include <stdio.h>
int main(int argc, char *argv[]){
FILE *myfile;
char content;
int max = 0;
// Open file
myfile = fopen(argv[1], "r");
if (myfile == NULL){
printf("Cannot open file \n");
exit(0);
}
// Read the first 10 lines from file
content = fgetc(myfile);
while (content != EOF){
max++;
if (max > 10)
break;
printf ("%c", content);
content = fgetc(myfile);
}
fclose(myfile);
return 0;
}
You have been already advised to use fgets. However, if your file has lines of unknown length, you may still want to use fgetc. Just make sure you count only newlines, not all characters:
int max = 0;
int content;
while ((content = fgetc(myfile)) != EOF && max < 10){
if (content == '\n') max++;
putchar(content);
}
fgetc() returns the next character in the file, not the next line. You probably want to use fgets() instead, which reads up to the next newline character into a buffer. Your code should probably end up with something like:
// allocate 1K for a buffer to read
char *buff = malloc(1024);
// iterate through file until we are out of data or we read 10 lines
while(fgets(buff, 1024, myfile) != NULL && max++ < 10) {
printf("%s\n", buff);
}
free(buff);
// close your file, finish up...
Read more about fgets() here: https://www.tutorialspoint.com/c_standard_library/c_function_fgets.htm
fgetc function reads the next character not the next ine. for reading the number of lines you should use fgets function. this function reads the full string till the end of the one line and stores it in a string.
your code Shuld be as:-
#include <stdio.h>
int main(int argc, char *argv[])
{
FILE *myfile;
char content[200];
int max = 0;
// Open file
myfile = fopen(argv[1], "r");
if (myfile == NULL)
{
printf("Cannot open file \n");
exit(0);
}
// Read the first 10 lines from file
fgets(content, 200, myfile);
while (content != EOF)
{
max++;
if (max > 10)
break;
printf("%s", content);
fgets(content, 200, myfile);
}
fclose(myfile);
return 0;
}
I have a list of columns containing text but I just to fetch first upper row from this list. How to do that?
#include <stdio.h>
int main()
{
FILE *fr;
char c;
fr = fopen("prog.txt", "r");
while( c != EOF)
{
c = fgetc(fr); /* read from file*/
printf("%c",c); /* display on screen*/
}
fclose(fr);
return 0;
}
Your stop condition is EOF, everything will be read to the end of the file, what you need is to read till newline character is found, furthermore EOF (-1) should be compared with int type.
You'll need something like:
#include <stdio.h>
#include <stdlib.h>
int main()
{
FILE *fr;
int c;
if(!(fr = fopen("prog.txt", "r"))){ //check file opening
perror("File error");
return EXIT_FAILURE;
}
while ((c = fgetc(fr)) != EOF && c != '\n')
{
printf("%c",c); /* display on screen*/
}
fclose(fr);
return EXIT_SUCCESS;
}
This is respecting your code reading the line char by char, you also have the library functions that allow you to read whole line, like fgets() for a portable piece of code, or getline() if you are not on Windows, alternatively download a portable version, and, of course you can make your own like this one or this one.
For whatever it's worth, here's an example that uses getline
#include <stdio.h>
int main()
{
FILE *fr;
char *line = NULL;
size_t len = 0;
ssize_t nread;
if (!(fr = fopen("prog.txt", "r"))) {
perror("Unable to open file");
return 1;
}
nread = getline(&line, &len, fr);
printf("line: %s, nread: %ld\n", line, nread);
fclose(fr);
return 0;
}
Some notes:
getline() can automatically allocate your read buffer, if you wish.
getline() returns the end of line delimiter. You can always strip it off, if you don't want it.
It's ALWAYS a good idea to check the status of I/O calls like "fopen()".
just replace EOF as '\n'(new line char). Than your code will read until reaching the new line. Here is what it looks like:
#include <stdio.h>
int main()
{
FILE *fr;
char c = ' ';
fr = fopen("prog.txt", "r");
while(c != EOF && c != '\n')
{
c = fgetc(fr); /* read from file*/
if(c != EOF){
printf("%c",c); /* display on screen*/
}
}
fclose(fr);
return 0;
}
I have not tested it yet but probably work. Please let me know if there is some problem with the code i will edit it.
Edit1:char c; in line 5 is initialized as ' ' for dealing with UB.
Edit2:adding condition (c != EOF) to while loop in line 7, for not giving reason to infinite loop.
Edit3:adding if statement to line 10 for not printing EOF which can be reason for odd results.
I am trying to open a text file inputted by the user and read this text file but print the text file 60 characters at a time so I think in order for me to do this I need to store the text into an array and if it is over 60 characters on a line it should start on a new line. However, when I run the code below an error message shows up saying : C^#
#include <stdio.h>
#include <stdlib.h>
int main()
{
char arr[];
arr[count] = '\0';
char ch, file_name[25];
FILE *fp;
printf("Enter file name: \n");
gets(file_name);
fp = fopen(file_name,"r"); // reading the file
if( fp == NULL )
{
perror("This file does not exist\n"); //if file cannot be found print error message
exit(EXIT_FAILURE);
}
printf("The contents of %s file are :\n", file_name);
while( ( ch = fgetc(fp) ) != EOF ){
arr[count] = ch;
count++;
printf("%s", arr);}
fclose(fp);
return 0;
}
char arr[]; is invalid.you need to specify a size.
array[count] = '\0'; : count is uninitialized.
gets(file_name); : gets is deprecated and dangerous.use another function like scanf.
Try the following code :
#include <stdio.h>
#include <stdlib.h>
int main()
{
int ch , count = 0;
char file_name[25];
FILE *fp;
printf("Enter file name: \n");
scanf(" %24s",file_name);
fp = fopen(file_name,"r"); // reading the file
if( fp == NULL )
{
perror("This file does not exist\n"); //if file cannot be found print error message
exit(EXIT_FAILURE);
}
fseek(fp, 0L, SEEK_END);
long sz = ftell(fp);
fseek(fp, 0L, SEEK_SET);
char arr[sz];
while( ( ch = fgetc(fp) ) != EOF )
{
if( count < sz )
{
arr[count] = ch;
count++;
}
}
arr[sz] = '\0';
printf("The contents of %s file are :\n", file_name);
printf("arr : %s\n",arr);
fclose(fp);
return 0;
}
fgetc always reads the next character until EOF. use fgets() instead:
char *fgets(char *s, int size, FILE *stream)
fgets() reads in at most one less than size characters from stream and
stores them into the buffer pointed to by s. Reading stops after an EOF
or a newline. If a newline is read, it is stored into the buffer. A
terminating null byte (aq\0aq) is stored after the last character in the
buffer.
1) your while loop is not properly delimited. In the absence of a { } block, the instruction arr[count] = ch; is the only repeted one.
I suppose it should include the incrementation of count too
while( ( ch = fgetc(fp) ) != EOF )
{
arr[count] = ch;
count++;
....
}
among other things (testing the counter etc).
2) there's no imperative need to read and store in an array. It is perfectly possible to transfer each character as soon as it is read, and add a line break when needed (new line, limit of 60 exceeded).
Three problems:
The variable count is not initialized, so it's value is indeterminate and using it will lead to undefined behavior.
The call printf(arr) treats arr as a string but arr is not terminated which again leads to undefined behavior.
The increment of count is outside the loop.
To solve the two first problems you must first initialize count to zero, then you must terminate the string after the loop:
arr[count] = '\0';
However, your printf(arr) call is still very problematic, what if the user enters some printf formatting codes, what will happen then? That's why you should never call printf with a user-provided input string, instead simply do
printf("%s", arr);
You also have a very big problem if the contents of the file you read is longer than 59 characters, and then you will overflow the array.
I was looking for a solution on how to read char by char on each line from a txt file and I found one, but I don't get some parts of the code. This is it:
#include <stdio.h>
#include <stdlib.h>
void handle_line(char *line) {
printf("%s", line);
}
int main(int argc, char *argv[]) {
int size = 1024, pos;
int c;
char *buffer = (char *)malloc(size);
FILE *f = fopen("myfile.txt", "r");
if(f) {
do { // read all lines in file
pos = 0;
do{ // read one line
c = fgetc(f);
if(c != EOF) buffer[pos++] = (char)c;
if(pos >= size - 1) { // increase buffer length - leave room for 0
size *=2;
buffer = (char*)realloc(buffer, size);
}
}while(c != EOF && c != '\n');
buffer[pos] = 0;
// line is now in buffer
handle_line(buffer);
} while(c != EOF);
fclose(f);
}
free(buffer);
return 0;
}
It was written by someone from here, but I can't reply 'cause I need more points lol. The parts I don't understand are:
if(c != EOF) buffer[pos++] = (char)c;
What does buffer[pos++] do? does it actually increase the variable "pos"? also, why does it start at 1 instead of 0? (pos starts at 0).
I can't really get track of the variable "pos", and I don't know why here buffer[pos] is 0:
buffer[pos] = 0;
The way I read the code is:
declare the size of the buffer that contains every char of every line (I mean, buffer is just free'd at the end, so it keeps the information on every line right?), then declare the other variables and alloc the memory of the buffer.
Open the file myfile.txt, and if it's not null, make pos = 0, then make "c" to store the first character of the file (now the function points to the next char), then if c != EOF meaning the end of file is not reached, save the character "c" in the position 1 of the buffer (I get confused here, why 1 and not 0). Then realloc twice as memory as before if needed. Do that for every character in the line untile you reach EOF or a \n. Now make buffer[pos] = 0, I dont know what value "pos" has, and I assume he makes buffer[pos] = 0 to indicate the end of the line? idk. Print the line, do that until you reach the end of the file. Close the file, free the memory on buffer.
Help! thanks.
fgetc(fp) - Reads the next character from the specified input stream (fp) and advances the associated file position indicator (you do not need to). If successful, the function will return the character read; otherwise, the value EOF (-1) is returned.
Here is a very simple example of using fgetc() to read each character of a file (and write it to another file using fputc())
char filename1[]={"c:\\play\\_in.txt"};//change paths as needed
char filename2[]={"c:\\play\\_out.txt"};
int main(void)
{
FILE *fp1 = {0};
FILE *fp2 = {0};
int c=0;
fp1 = fopen(filename1, "r");
if(fp1)
{
fp2 = fopen (filename2, "w");
if(fp2)
{
c = fgetc(fp1);
while(c != EOF)
{
fputc(c, fp2);
c = fgetc(fp1);
}
fclose(fp2);
}
fclose(fp1);
}
return 0;
}
I am a biology student and I am trying to learn perl, python and C and also use the scripts in my work. So, I have a file as follows:
>sequence1
ATCGATCGATCG
>sequence2
AAAATTTT
>sequence3
CCCCGGGG
The output should look like this, that is the name of each sequence and the count of characters in each line and printing the total number of sequences in the end of the file.
sequence1 12
sequence2 8
sequence3 8
Total number of sequences = 3
I could make the perl and python scripts work, this is the python script as an example:
#!/usr/bin/python
import sys
my_file = open(sys.argv[1]) #open the file
my_output = open(sys.argv[2], "w") #open output file
total_sequence_counts = 0
for line in my_file:
if line.startswith(">"):
sequence_name = line.rstrip('\n').replace(">","")
total_sequence_counts += 1
continue
dna_length = len(line.rstrip('\n'))
my_output.write(sequence_name + " " + str(dna_length) + '\n')
my_output.write("Total number of sequences = " + str(total_sequence_counts) + '\n')
Now, I want to write the same script in C, this is what I have achieved so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
input = FILE *fopen(const char *filename, "r");
output = FILE *fopen(const char *filename, "w");
double total_sequence_counts = 0;
char sequence_name[];
char line [4095]; // set a temporary line length
char buffer = (char *) malloc (sizeof(line) +1); // allocate some memory
while (fgets(line, sizeof(line), filename) != NULL) { // read until new line character is not found in line
buffer = realloc(*buffer, strlen(line) + strlen(buffer) + 1); // realloc buffer to adjust buffer size
if (buffer == NULL) { // print error message if memory allocation fails
printf("\n Memory error");
return 0;
}
if (line[0] == ">") {
sequence_name = strcpy(sequence_name, &line[1]);
total_sequence_counts += 1
}
else {
double length = strlen(line);
fprintf(output, "%s \t %ld", sequence_name, length);
}
fprintf(output, "%s \t %ld", "Total number of sequences = ", total_sequence_counts);
}
int fclose(FILE *input); // when you are done working with a file, you should close it using this function.
return 0;
int fclose(FILE *output);
return 0;
}
But this code, of course is full of mistakes, my problem is that despite studying a lot, I still can't properly understand and use the memory allocation and pointers so I know I especially have mistakes in that part. It would be great if you could comment on my code and see how it can turn into a script that actually work. By the way, in my actual data, the length of each line is not defined so I need to use malloc and realloc for that purpose.
For a simple program like this, where you look at short lines one at a time, you shouldn't worry about dynamic memory allocation. It is probably good enough to use local buffers of a reasonable size.
Another thing is that C isn't particularly suited for quick-and-dirty string processing. For example, there isn't a strstrip function in the standard library. You usually end up implementing such behaviour yourself.
An example implementation looks like this:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXLEN 80 /* Maximum line length, including null terminator */
int main(int argc, char *argv[])
{
FILE *in;
FILE *out;
char line[MAXLEN]; /* Current line buffer */
char ref[MAXLEN] = ""; /* Sequence reference buffer */
int nseq = 0; /* Sequence counter */
if (argc != 3) {
fprintf(stderr, "Usage: %s infile outfile\n", argv[0]);
exit(1);
}
in = fopen(argv[1], "r");
if (in == NULL) {
fprintf(stderr, "Couldn't open %s.\n", argv[1]);
exit(1);
}
out = fopen(argv[2], "w");
if (in == NULL) {
fprintf(stderr, "Couldn't open %s for writing.\n", argv[2]);
exit(1);
}
while (fgets(line, sizeof(line), in)) {
int len = strlen(line);
/* Strip whitespace from end */
while (len > 0 && isspace(line[len - 1])) len--;
line[len] = '\0';
if (line[0] == '>') {
/* First char is '>': copy from second char in line */
strcpy(ref, line + 1);
} else {
/* Other lines are sequences */
fprintf(out, "%s: %d\n", ref, len);
nseq++;
}
}
fprintf(out, "Total number of sequences. %d\n", nseq);
fclose(in);
fclose(out);
return 0;
}
A lot of code is about enforcing arguments and opening and closing files. (You could cut out a lot of code if you used stdin and stdout with file redirections.)
The core is the big while loop. Things to note:
fgets returns NULL on error or when the end of file is reached.
The first lines determine the length of the line and then remove white-space from the end.
It is not enough to decrement length, at the end the stripped string must be terminated with the null character '\0'
When you check the first character in the line, you should check against a char, not a string. In C, single and double quotes are not interchangeable. ">" is a string literal of two characters, '>' and the terminating '\0'.
When dealing with countable entities like chars in a string, use integer types, not floating-point numbers. (I've used (signed) int here, but because there can't be a negative number of chars in a line, it might have been better to have used an unsigned type.)
The notation line + 1 is equivalent to &line[1].
The code I've shown doesn't check that there is always one reference per sequence. I'll leave this as exercide to the reader.
For a beginner, this can be quite a lot to keep track of. For small text-processing tasks like yours, Python and Perl are definitely better suited.
Edit: The solution above won't work for long sequences; it is restricted to MAXLEN characters. But you don't need dynamic allocation if you only need the length, not the contents of the sequences.
Here's an updated version that doesn't read lines, but read characters instead. In '>' context, it stored the reference. Otherwise it just keeps a count:
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h> /* for isspace() */
#define MAXLEN 80 /* Maximum line length, including null terminator */
int main(int argc, char *argv[])
{
FILE *in;
FILE *out;
int nseq = 0; /* Sequence counter */
char ref[MAXLEN]; /* Reference name */
in = fopen(argv[1], "r");
out = fopen(argv[2], "w");
/* Snip: Argument and file checking as above */
while (1) {
int c = getc(in);
if (c == EOF) break;
if (c == '>') {
int n = 0;
c = fgetc(in);
while (c != EOF && c != '\n') {
if (n < sizeof(ref) - 1) ref[n++] = c;
c = fgetc(in);
}
ref[n] = '\0';
} else {
int len = 0;
int n = 0;
while (c != EOF && c != '\n') {
n++;
if (!isspace(c)) len = n;
c = fgetc(in);
}
fprintf(out, "%s: %d\n", ref, len);
nseq++;
}
}
fprintf(out, "Total number of sequences. %d\n", nseq);
fclose(in);
fclose(out);
return 0;
}
Notes:
fgetc reads a single byte from a file and returns this byte or EOF when the file has ended. In this implementation, that's the only reading function used.
Storing a reference string is implemented via fgetc here too. You could probably use fgets after skipping the initial angle bracket, too.
The counting just reads bytes without storing them. n is the total count, len is the count up to the last non-space. (Your lines probably consist only of ACGT without any trailing space, so you could skip the test for space and use n instead of len.)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]){
FILE *my_file = fopen(argv[1], "r");
FILE *my_output = fopen(argv[2], "w");
int total_sequence_coutns = 0;
char *sequence_name;
int dna_length;
char *line = NULL;
size_t size = 0;
while(-1 != getline(&line, &size, my_file)){
if(line[0] == '>'){
sequence_name = strdup(strtok(line, ">\n"));
total_sequence_coutns +=1;
continue;
}
dna_length = strlen(strtok(line, "\n"));
fprintf(my_output, "%s %d\n", sequence_name, dna_length);
free(sequence_name);
}
fprintf(my_output, "Total number of sequences = %d\n", total_sequence_coutns);
fclose(my_file);
fclose(my_output);
free(line);
return (0);
}