I have created a function where I can read the first x amount of bytes from a file. The file is a binary file, and contains NULL characters. I am trying to read the file into a char* however it gives an incorrect length because I am appending NULL characters to the string. Is there a workaround to storing data from a file in a string
// readlen is the amount I want to read from the start of a file
FILE* fptr = fopen(path, "rb");
char* contents = (char*)malloc(readlen + 1);
int read = 0;
int ch;
while ((ch = fgetc(fptr)) != EOF && read != readlen) {
contents[read++] = (char)ch;
}
contents[readlen] = '\0';
fclose(fptr);
return contents;
First, a terminology misconception in the title "Read null characters from file into string". In C, string can not contain NUL characters, because those mark end of the string. A char buffer can contain them, but it's not a string. And when it is not a string with an end marker, you need to keep track of its length.
Then, to solve your problem, you could return a struct with the information
struct buffer {
char *contents;
size_t size;
};
With that, the code you show becomes something like this:
// readlen is the amount I want to read from the start of a file
FILE* fptr = fopen(path, "rb");
struct buffer buffer;
buffer.contents = (char*)malloc(readlen + 1);
buffer.size = 0;
int ch;
while ((ch = fgetc(fptr)) != EOF && buffer.size != readlen) {
buffer.contents[buffer.size++] = (char)ch;
}
buffer.contents[buffer.size] = '\0';
fclose(fptr);
return buffer;
Further improvement of your function would be to use fread to do the read with one function call. You might discover, that your own function becomes unnecessary and you could just directly call fread, even.
Also, putting extra 0 at the end of the char buffer might not be useful, since it is not a string, but it doesn't really hurt and will be convenient if you ever read a text file and want to print it or something. Still, unless you have some specific need for it, I'd consider removing that extra byte.
Related
FILE *fp = fopen("story.txt", "r");
if(fp == NULL){
printf("\nError opening file.\nExiting program.\n");
exit(1);
}
char text[100];
while(fgets(text, 100, fp) != NULL){
printf("%s", text);
}
printf("\n");
fclose(fp);
I'm trying to print the first 100 characters of a text file, including new lines, however when I use the code above it presents some weird behavior. First of all, it only prints the very last line of the text file, which itself is under 100 characters. Also, if I include two print statements in the while loop i.e.
while(fgets(text, 100, fp) != NULL){
printf("%s", text);
printf("%s", text);
}
It prints a lot more than 125 chars of the text file (somewhere in the thousands, it's a big text file), and the contents of said text is a bunch of seemingly random segments from the file in one constant stream, no new lines or anything.
So I guess my question is is there any way to use fgets so that it prints the text in the file, starting from the top, and includes new lines? I eventually have to use this to turn a text file into a character array, so that I can make a new, modified character array based off of that array, which will be printed to a new text file. So if there is a better way to approach that end goal, that would be appreciated.
EDIT: after some discussion in the comments I've realized that the text I am using is just one big block of text with carriage returns and no newlines. I guess at this point my main problem is how to turn this text file with carriage returns into a character array.
If the goal is to read a text file in lines of 100 characters, and to do away with the carriage returns, you can still use fgets() as long as you remember that fgets() will take characters up to and including the next newline, or until one less than the specified number of characters has been read.
The code below reads a line of text, up to BUFFER_SZ-1 characters, increases the memory allocation to hold a new line of text, and copies the line into the allocated space, removing carriage returns and any trailing newlines. Note that the address of the reallocated space is first stored in a temporary pointer. realloc() returns a null pointer in the event of an allocation error, and this step avoids a potential memory leak.
Since the lines are broken at BUFFER_SZ-1 characters, words may be split across lines, and you may want to develop additional logic to handle this. It would be more efficient to reallocate in larger chunks and less frequently, rather than once for every line, as is done here. Also note that it may be useful to open the file in binary mode to more closely parse line endings.
#include <stdio.h>
#include <stdlib.h>
#define BUFFER_SZ 100
int main(void)
{
FILE *fp = fopen("story.txt", "r");
if (fp == NULL) {
fprintf(stderr, "Unable to open file\n");
exit(EXIT_FAILURE);
}
char buffer[BUFFER_SZ];
char (*text)[sizeof buffer] = NULL;
char (*temp)[sizeof buffer] = NULL;
size_t numlines = 0;
while (fgets(buffer, sizeof buffer, fp) != NULL) {
++numlines;
/* Allocate space for next line */
temp = realloc(text, sizeof(*text) * numlines);
if (temp == NULL) {
fprintf(stderr, "Error in realloc()\n");
exit(EXIT_FAILURE);
}
text = temp;
/* Copy buffer to text, removing carriage returns and newlines */
char *c = buffer;
char *line = text[numlines-1];
while (*c != '\n' && *c != '\0') {
if (*c != '\r') {
*line++ = *c;
}
++c;
}
*c = '\0';
}
if (fclose(fp) != 0) {
fprintf(stderr, "Unable to close file\n");
}
for (size_t i = 0; i < numlines; i++) {
printf("%s\n", text[i]);
}
free(text);
return 0;
}
Another option would be to replace the carriage returns with newlines. This may be what OP had in mind. The above program is easily modified to accomplish this. Note that the \n is removed from the printf() statement that displays the results, since newlines are now included in the strings.
...
/* Copy buffer to text, converting carriage returns to newlines */
char *c = buffer;
char *line = text[numlines-1];
while (*c != '\n' && *c != '\0') {
if (*c == '\r') {
*line++ = '\n';
} else {
*line++ = *c;
}
++c;
}
*c = '\0';
}
if (fclose(fp) != 0) {
fprintf(stderr, "Unable to close file\n");
}
for (size_t i = 0; i < numlines; i++) {
printf("%s", text[i]);
}
...
It doesn't copy one line onto the end of another. It simply reuses the buffer you keep passing it. If you want multiple lines stored, copy them to another buffer, and concatenate them. (See: strcat)
I was looking for a solution on how to read char by char on each line from a txt file and I found one, but I don't get some parts of the code. This is it:
#include <stdio.h>
#include <stdlib.h>
void handle_line(char *line) {
printf("%s", line);
}
int main(int argc, char *argv[]) {
int size = 1024, pos;
int c;
char *buffer = (char *)malloc(size);
FILE *f = fopen("myfile.txt", "r");
if(f) {
do { // read all lines in file
pos = 0;
do{ // read one line
c = fgetc(f);
if(c != EOF) buffer[pos++] = (char)c;
if(pos >= size - 1) { // increase buffer length - leave room for 0
size *=2;
buffer = (char*)realloc(buffer, size);
}
}while(c != EOF && c != '\n');
buffer[pos] = 0;
// line is now in buffer
handle_line(buffer);
} while(c != EOF);
fclose(f);
}
free(buffer);
return 0;
}
It was written by someone from here, but I can't reply 'cause I need more points lol. The parts I don't understand are:
if(c != EOF) buffer[pos++] = (char)c;
What does buffer[pos++] do? does it actually increase the variable "pos"? also, why does it start at 1 instead of 0? (pos starts at 0).
I can't really get track of the variable "pos", and I don't know why here buffer[pos] is 0:
buffer[pos] = 0;
The way I read the code is:
declare the size of the buffer that contains every char of every line (I mean, buffer is just free'd at the end, so it keeps the information on every line right?), then declare the other variables and alloc the memory of the buffer.
Open the file myfile.txt, and if it's not null, make pos = 0, then make "c" to store the first character of the file (now the function points to the next char), then if c != EOF meaning the end of file is not reached, save the character "c" in the position 1 of the buffer (I get confused here, why 1 and not 0). Then realloc twice as memory as before if needed. Do that for every character in the line untile you reach EOF or a \n. Now make buffer[pos] = 0, I dont know what value "pos" has, and I assume he makes buffer[pos] = 0 to indicate the end of the line? idk. Print the line, do that until you reach the end of the file. Close the file, free the memory on buffer.
Help! thanks.
fgetc(fp) - Reads the next character from the specified input stream (fp) and advances the associated file position indicator (you do not need to). If successful, the function will return the character read; otherwise, the value EOF (-1) is returned.
Here is a very simple example of using fgetc() to read each character of a file (and write it to another file using fputc())
char filename1[]={"c:\\play\\_in.txt"};//change paths as needed
char filename2[]={"c:\\play\\_out.txt"};
int main(void)
{
FILE *fp1 = {0};
FILE *fp2 = {0};
int c=0;
fp1 = fopen(filename1, "r");
if(fp1)
{
fp2 = fopen (filename2, "w");
if(fp2)
{
c = fgetc(fp1);
while(c != EOF)
{
fputc(c, fp2);
c = fgetc(fp1);
}
fclose(fp2);
}
fclose(fp1);
}
return 0;
}
For example, if I had a file name random.txt, which reads:
This is a string.
Abc
Zxy
How would you save the characters in random.txt to a string or array that includes all of the characters in the text file?
So far I have (using redirection for file)
#include <stdio.h>
#include <string.h>
int main () {
int c;
do {
c = fgetc(stdin);
putchar(c);
} while (c != EOF);
return 0;
}
First part: About file handles
The stdin variable holds a FILE handle to which the user input is redirected (the FILE data type is defined in stdio.h). You can create handles to files using the function FILE *fopen(const char *path, const char *mode).
Your example applied to a regular file would be something like this (no error checking is done):
int main() {
int c;
FILE *myfile = fopen("path/to/file", "r"); //Open file for reading
while(!feof(myfile)) {
c = fgetc(myfile);
//do stuff with 'c'
//...
}
fclose(myfile); //close the file
return 0;
}
More information about fopen here: http://linux.die.net/man/3/fopen
Second part: About C strings
C strings (char arrays terminated with the null character '\0') can be defined in several ways. One of them is by statically defining them:
char mystring[256]; //This defines an array of 256 bytes (255 characters plus end null)
It is very important to take care about the limits of the buffer. In our example, writing beyond 256 bytes in the buffer will make the program crash. If we assume our file will not have lines longer than 255 characters (including line terminators like \r and \n) we can use the fgets function (http://linux.die.net/man/3/fgets):
char *fgets(char *s, int size, FILE *stream);
Simple (newbie) example:
int main() {
char mystring[256];
FILE *myfile = fopen("path/to/file", "r"); //Open file for reading
while(!feof(myfile)) {
if (fgets(mystring, sizeof(mystring), myfile) != NULL) {
printf("%s", mystring);
}
}
fclose(myfile); //close the file
return 0;
}
Notice that fgets is used for reading lines. If you want to read characters 1 by 1, you should keep using fgetc and pushing them manually into a buffer.
Finally, if you want to read a whole text file into a C string (no error checking):
int main() {
FILE *myfile = fopen("path/to/file", "r"); //Open file for reading
//Get the file size
fseek(myfile, 0, SEEK_END);
long filesize = ftell(myfile);
fseek(myfile, 0, SEEK_SET);
//Allocate buffer dynamically (not statically as in previous examples)
//We are reserving 'filesize' bytes plus the end null required in all C strings.
char *mystring = malloc(filesize + 1); //'malloc' is defined in stdlib.h
fread(mystring, filesize, 1, myfile); //read all file
mystring[filesize] = 0; //write the end null
fclose(myfile); //close file
printf("%s", mystring); //dump contents to screen
free(mystring); //deallocate buffer. 'mystring' is no longer usable.
return 0;
}
The following will work on either stdin or an actual file (i.e. you can replace "stream" with stdin, or fopen() it with a filename), dynamically allocating as it goes. After it runs, "ptr" will be a pointer to an array holding the contents of the file.
/* read chars from stream in blocks of 4096 bytes,
dynamically allocating until eof */
size_t bytes_read = 0;
char * ptr = NULL;
while (1) {
size_t chunk_read;
/* increase size of allocation by 4096 bytes */
ptr = realloc(ptr, bytes_read + 4096);
/* read up to 4096 bytes to the newest portion of allocation */
chunk_read = fread(ptr + bytes_read, 1, 4096, stream);
bytes_read += chunk_read;
/* if fread() got less than the full amount of characters, break */
if (chunk_read < 4096) break;
}
/* resize pointer downward to actual number of bytes read,
plus an explicit null termination */
bytes_read += 1;
ptr = realloc(ptr, bytes_read);
ptr[bytes_read - 1] = '\0';
I have a text file named test.txt
I want to write a C program that can read this file and print the content to the console (assume the file contains only ASCII text).
I don't know how to get the size of my string variable. Like this:
char str[999];
FILE * file;
file = fopen( "test.txt" , "r");
if (file) {
while (fscanf(file, "%s", str)!=EOF)
printf("%s",str);
fclose(file);
}
The size 999 doesn't work because the string returned by fscanf can be larger than that. How can I solve this?
The simplest way is to read a character, and print it right after reading:
int c;
FILE *file;
file = fopen("test.txt", "r");
if (file) {
while ((c = getc(file)) != EOF)
putchar(c);
fclose(file);
}
c is int above, since EOF is a negative number, and a plain char may be unsigned.
If you want to read the file in chunks, but without dynamic memory allocation, you can do:
#define CHUNK 1024 /* read 1024 bytes at a time */
char buf[CHUNK];
FILE *file;
size_t nread;
file = fopen("test.txt", "r");
if (file) {
while ((nread = fread(buf, 1, sizeof buf, file)) > 0)
fwrite(buf, 1, nread, stdout);
if (ferror(file)) {
/* deal with error */
}
fclose(file);
}
The second method above is essentially how you will read a file with a dynamically allocated array:
char *buf = malloc(chunk);
if (buf == NULL) {
/* deal with malloc() failure */
}
/* otherwise do this. Note 'chunk' instead of 'sizeof buf' */
while ((nread = fread(buf, 1, chunk, file)) > 0) {
/* as above */
}
Your method of fscanf() with %s as format loses information about whitespace in the file, so it is not exactly copying a file to stdout.
There are plenty of good answers here about reading it in chunks, I'm just gonna show you a little trick that reads all the content at once to a buffer and prints it.
I'm not saying it's better. It's not, and as Ricardo sometimes it can be bad, but I find it's a nice solution for the simple cases.
I sprinkled it with comments because there's a lot going on.
#include <stdio.h>
#include <stdlib.h>
char* ReadFile(char *filename)
{
char *buffer = NULL;
int string_size, read_size;
FILE *handler = fopen(filename, "r");
if (handler)
{
// Seek the last byte of the file
fseek(handler, 0, SEEK_END);
// Offset from the first to the last byte, or in other words, filesize
string_size = ftell(handler);
// go back to the start of the file
rewind(handler);
// Allocate a string that can hold it all
buffer = (char*) malloc(sizeof(char) * (string_size + 1) );
// Read it all in one operation
read_size = fread(buffer, sizeof(char), string_size, handler);
// fread doesn't set it so put a \0 in the last position
// and buffer is now officially a string
buffer[string_size] = '\0';
if (string_size != read_size)
{
// Something went wrong, throw away the memory and set
// the buffer to NULL
free(buffer);
buffer = NULL;
}
// Always remember to close the file.
fclose(handler);
}
return buffer;
}
int main()
{
char *string = ReadFile("yourfile.txt");
if (string)
{
puts(string);
free(string);
}
return 0;
}
Let me know if it's useful or you could learn something from it :)
Instead just directly print the characters onto the console because the text file maybe very large and you may require a lot of memory.
#include <stdio.h>
#include <stdlib.h>
int main() {
FILE *f;
char c;
f=fopen("test.txt","rt");
while((c=fgetc(f))!=EOF){
printf("%c",c);
}
fclose(f);
return 0;
}
Use "read()" instead o fscanf:
ssize_t read(int fildes, void *buf, size_t nbyte);
DESCRIPTION
The read() function shall attempt to read nbyte bytes from the file associated with the open file descriptor, fildes, into the buffer pointed to by buf.
Here is an example:
http://cmagical.blogspot.com/2010/01/c-programming-on-unix-implementing-cat.html
Working part from that example:
f=open(argv[1],O_RDONLY);
while ((n=read(f,l,80)) > 0)
write(1,l,n);
An alternate approach is to use getc/putc to read/write 1 char at a time. A lot less efficient. A good example: http://www.eskimo.com/~scs/cclass/notes/sx13.html
You can use fgets and limit the size of the read string.
char *fgets(char *str, int num, FILE *stream);
You can change the while in your code to:
while (fgets(str, 100, file)) /* printf("%s", str) */;
Two approaches leap to mind.
First, don't use scanf. Use fgets() which takes a parameter to specify the buffer size, and which leaves any newline characters intact. A simple loop over the file that prints the buffer content should naturally copy the file intact.
Second, use fread() or the common C idiom with fgetc(). These would process the file in fixed-size chunks or a single character at a time.
If you must process the file over white-space delimited strings, then use either fgets or fread to read the file, and something like strtok to split the buffer at whitespace. Don't forget to handle the transition from one buffer to the next, since your target strings are likely to span the buffer boundary.
If there is an external requirement to use scanf to do the reading, then limit the length of the string it might read with a precision field in the format specifier. In your case with a 999 byte buffer, then say scanf("%998s", str); which will write at most 998 characters to the buffer leaving room for the nul terminator. If single strings longer than your buffer are allowed, then you would have to process them in two pieces. If not, you have an opportunity to tell the user about an error politely without creating a buffer overflow security hole.
Regardless, always validate the return values and think about how to handle bad, malicious, or just malformed input.
You can use getline() to read your text file without worrying about large lines:
getline() reads an entire line from stream, storing the address of the buffer containing the text into *lineptr. The buffer is null-terminated and includes the newline character, if one was found.
If *lineptr is set to NULL before the call, then getline() will allocate a buffer for storing the line. This buffer should be freed by the user program even if getline() failed.
bool read_file(const char *filename)
{
FILE *file = fopen(filename, "r");
if (!file)
return false;
char *line = NULL;
size_t linesize = 0;
while (getline(&line, &linesize, file) != -1) {
printf("%s", line);
free(line);
}
free(line);
fclose(file);
return true;
}
You can use it like this:
int main(void)
{
if (!read_file("test.txt")) {
printf("Error reading file\n");
exit(EXIT_FAILURE);
}
}
I use this version
char* read(const char* filename){
FILE* f = fopen(filename, "rb");
if (f == NULL){
exit(1);
}
fseek(f, 0L, SEEK_END);
long size = ftell(f)+1;
fclose(f);
f = fopen(filename, "r");
void* content = memset(malloc(size), '\0', size);
fread(content, 1, size-1, f);
fclose(f);
return (char*) content;
}
You could read the entire file with dynamic memory allocation, but isn't a good idea because if the file is too big, you could have memory problems.
So is better read short parts of the file and print it.
#include <stdio.h>
#define BLOCK 1000
int main() {
FILE *f=fopen("teste.txt","r");
int size;
char buffer[BLOCK];
// ...
while((size=fread(buffer,BLOCK,sizeof(char),f)>0))
fwrite(buffer,size,sizeof(char),stdout);
fclose(f);
// ...
return 0;
}
Using C I would like to read in the contents of a text file in such a way as to have when all is said and done an array of strings with the nth string representing the nth line of the text file. The lines of the file can be arbitrarily long.
What's an elegant way of accomplishing this? I know of some neat tricks to read a text file directly into a single appropriately sized buffer, but breaking it down into lines makes it trickier (at least as far as I can tell).
Thanks very much!
Breaking it down into lines means parsing the text and replacing all the EOL (by EOL I mean \n and \r) characters with 0.
In this way you can actually reuse your buffer and store just the beginning of each line into a separate char * array (all by doing only 2 passes).
In this way you could do one read for the whole file size+2 parses which probably would improve performance.
It's possible to read the number of lines in the file (loop fgets), then create a 2-dimensional array with the first dimension being the number of lines+1. Then, just re-read the file into the array.
You'll need to define the length of the elements, though. Or, do a count for the longest line size.
Example code:
inFile = fopen(FILENAME, "r");
lineCount = 0;
while(inputError != EOF) {
inputError = fscanf(inFile, "%s\n", word);
lineCount++;
}
fclose(inFile);
// Above iterates lineCount++ after the EOF to allow for an array
// that matches the line numbers
char names[lineCount][MAX_LINE];
fopen(FILENAME, "r");
for(i = 1; i < lineCount; i++)
fscanf(inFile, "%s", names[i]);
fclose(inFile);
For C (as opposed to C++), you'd probably wind up using fgets(). However, you might run into issues due to your arbitrary length lines.
Perhaps a Linked List would be the best way to do this?
The compiler won't like having an array with no idea how big to make it. With a Linked List you can have a really large text file, and not worry about allocating enough memory to the array.
Unfortunately, I haven't learned how to do linked lists, but maybe somebody else could help you.
If you have a good way to read the whole file into memory, you are almost there. After you've done that you could scan the file twice. Once to count the lines, and once to set the line pointers and replace '\n' and (and maybe '\r' if the file is read in Windows binary mode) with '\0'. In between scans allocate an array of pointers, now that you know how many you need.
you can use this way
#include <stdlib.h> /* exit, malloc, realloc, free */
#include <stdio.h> /* fopen, fgetc, fputs, fwrite */
struct line_reader {
/* All members are private. */
FILE *f;
char *buf;
size_t siz;
};
/*
* Initializes a line reader _lr_ for the stream _f_.
*/
void
lr_init(struct line_reader *lr, FILE *f)
{
lr->f = f;
lr->buf = NULL;
lr->siz = 0;
}
/*
* Reads the next line. If successful, returns a pointer to the line,
* and sets *len to the number of characters, at least 1. The result is
* _not_ a C string; it has no terminating '\0'. The returned pointer
* remains valid until the next call to next_line() or lr_free() with
* the same _lr_.
*
* next_line() returns NULL at end of file, or if there is an error (on
* the stream, or with memory allocation).
*/
char *
next_line(struct line_reader *lr, size_t *len)
{
size_t newsiz;
int c;
char *newbuf;
*len = 0; /* Start with empty line. */
for (;;) {
c = fgetc(lr->f); /* Read next character. */
if (ferror(lr->f))
return NULL;
if (c == EOF) {
/*
* End of file is also end of last line,
` * unless this last line would be empty.
*/
if (*len == 0)
return NULL;
else
return lr->buf;
} else {
/* Append c to the buffer. */
if (*len == lr->siz) {
/* Need a bigger buffer! */
newsiz = lr->siz + 4096;
newbuf = realloc(lr->buf, newsiz);
if (newbuf == NULL)
return NULL;
lr->buf = newbuf;
lr->siz = newsiz;
}
lr->buf[(*len)++] = c;
/* '\n' is end of line. */
if (c == '\n')
return lr->buf;
}
}
}
/*
* Frees internal memory used by _lr_.
*/
void
lr_free(struct line_reader *lr)
{
free(lr->buf);
lr->buf = NULL;
lr->siz = 0;
}
/*
* Read a file line by line.
* http://rosettacode.org/wiki/Read_a_file_line_by_line
*/
int
main()
{
struct line_reader lr;
FILE *f;
size_t len;
char *line;
f = fopen("foobar.txt", "r");
if (f == NULL) {
perror("foobar.txt");
exit(1);
}
/*
* This loop reads each line.
* Remember that line is not a C string.
* There is no terminating '\0'.
*/
lr_init(&lr, f);
while (line = next_line(&lr, &len)) {
/*
* Do something with line.
*/
fputs("LINE: ", stdout);
fwrite(line, len, 1, stdout);
}
if (!feof(f)) {
perror("next_line");
exit(1);
}
lr_free(&lr);
return 0;
}