I'm a beginner to C and wanted to code a simple function that reads the content of file and returns it as a string, as an exercise.
Here is my solution which I think works, but is there any obvious bad practices or unoptimal code here ? For example, I manually added a \0 at the end of the string, but I don't know if it is really necessary...
#include <stdio.h>
#include <stdlib.h>
char *readFile(char *path)
{
//open file
FILE *file = fopen(path, "r");
//if broken
if (file == NULL)
{
printf("Erreur");
return NULL;
}
//return variable
char *result;
//length of the file
int len;
fseek(file, 0, SEEK_END);
len = ftell(file);
fseek(file, 0, SEEK_SET);
//initialising return variable
result = (char*) malloc(sizeof(char) * (len + 1));
int c;
int i = 0;
while (feof(file) == 0)
{
c = fgetc(file);
if (c != EOF)
{
printf("%04x -> %c\n", c, c);
*(result + i) = c;
i++;
}
}
*(result + i) = '\0';
printf("len : %i\n", len);
fclose(file);
return result;
}
I'd replace this:
int c;
int i = 0;
while (feof(file) == 0)
{
c = fgetc(file);
if (c != EOF)
{
printf("%04x -> %c\n", c, c);
*(result + i) = c;
i++;
}
}
with this:
fread(file, 1, len, result);
It's much shorter
It's correct
It's certainly faster
There is still room for improvement though, for example you could add error handling, fread can fail.
Since you have already got the length of the file to be read, you could also read them at once instead char-by-char.
Another implmentation of your function, for example:
char *readFile(char *path)
{
//open file
FILE *file = fopen(path, "r");
//if broken
if (file == NULL)
{
printf("Erreur");
return NULL;
}
//return variable
char *result;
//length of the file
int len;
fseek(file, 0, SEEK_END);
len = ftell(file);
fseek(file, 0, SEEK_SET);
//initialising return variable
result = (char*) malloc(sizeof(char) * (len + 1));
size_t i = fread(result, sizeof(char), len, file);
*(result + i) = '\0';
printf("len : %i\n", len);
fclose(file);
return result;
}
Related
I wrote a function which should check if a word is included in a file, but my function returns always NOT_EXISTENT, why? I checked ptr and its always empty but the memory is located.
Here my function:
int search_for_word(char wort[]) {
char *ptr;
FILE *file;
unsigned long size_of_file = 0;
file = fopen("array.txt", "r");
if (file == NULL) {
return ERROR;
}
fseek(file, 0L, SEEK_END);
size_of_file = ftell(file);
ptr = malloc(sizeof(char) * size_of_file + 1);
printf("Size:%li\n", size_of_file);
if (ptr == NULL) {
return ERROR;
}
fread(ptr, sizeof(char), size_of_file, file);
if (strstr(ptr, wort) == NULL) {
return NOT_EXISTENT;
}
fclose(file);
return EXISTENT;
}
At least these problems:
(Biggest issue) Missing rewind #alinsoar
fread() is attempting a read from the end of the file. Move back to the beginning.
rewind(file); // Add
size_t length = fread(ptr, sizeof(char), size_of_file, file);
Not a string #pm100
ptr is not certainly a string as it may lack a null character. strstr() expects 2 strings.
strstr(ptr, wort) // bad
Instead, append a null character to the data read before strstr().
size_t length = fread(ptr, sizeof(char), size_of_file, file);
ptr[length] = '\0'; // Add
Failure to close
Code selectively performs fclose(file). Call fclose() with each successful fopen().
Missing free() #Weather Vane
Free allocated memory when done.
wort[] may be ill formed
Posted code does not show the origin of wort[]. So recommendations are guesses at best.
No check on fseek() success
// fseek(file, 0L, SEEK_END)
if (fseek(file, 0 /* L not needed */, SEEK_END) == -1) {
Handle_error();
}
Better with a const #chqrlie
This allows passing constant strings.
// int search_for_word(char wort[]){
int search_for_word(const char wort[]) {
Minor
Size sizeof(char) * size_of_file + 1 may exceed SIZE_MAX.
sizeof(char) * size_of_file + 1 conceptually wrong. Better as sizeof(char) * (size_of_file + 1) or just size_of_file + 1u.
Some rough alternative code - unchecked.
// Let calling code open the file
// Return 1 on success.
// Return 0 on no-find.
// Return -1 on other failures.
int search_for_word(const char *word, FILE *inf) {
if (inf == 0) {
return -1;
}
size_t length_word = strlen(word);
if (length_word >= SIZE_MAX / 2) {
return -1; // TBD code to handle this extreme case
}
size_t buf_size = 4096; // Adjust as desired
if (buf_size <= length_word * 2) {
buf_size = length_word * 2 + 1;
}
char *buf = malloc(buf_size);
if (buf == NULL) {
return -1;
}
char *in = buf;
size_t in_length = 0;
for (;;) {
size_t length_read = fread(in, 1, buf_size, inf);
in[length_read] = '\0';
if (strstr(buf, word)) {
free(buf);
return 1;
}
if (length_read < buf_size) { // no more data expected
free(buf);
return 0;
}
// Copy last portion of buffer to the beginning.
in_length += length_read;
memmove(buf, &buf[in_length - length_word], length_word);
in_length = length_word;
in = buf + in_length;
}
}
Here is a modified version implementing suggestions from chux's answer and with an alternative method for huge files (which should probably be used for all files):
int search_for_word(const char *wort) {
int res = NOT_EXISTENT;
FILE *file = fopen("array.txt", "r");
if (file == NULL) {
return ERROR;
}
#if 0 // set to 1 if you want to load the whole file in memory
if (fseek(file, 0L, SEEK_END) == -1) {
fclose(file);
return ERROR;
}
long size_of_file = ftell(file);
if (size_of_file < 0) {
fclose(file);
return ERROR;
}
rewind(file);
if ((unsigned long)size_of_file + 1 <= SIZE_MAX) {
char *ptr = malloc((size_t)size_of_file + 1);
if (ptr != NULL) {
size_t length = fread(ptr, 1, size_of_file, file);
ptr[length] = '\0';
res = strstr(ptr, wort) ? EXISTENT : NOT_EXISTENT;
free(ptr);
fclose(file);
return res;
}
}
#endif
/* use a different method: read 4KB at a time */
size_t len = strlen(wort);
char buf[4096 + len + 1];
size_t nread, pos = 0;
while ((nread = fread(buf + pos, 1, 4096, file)) > 0) {
buf[pos + nread] = '\0';
if (strstr(buf, wort)) {
res = EXISTENT;
break;
}
if (pos + nread <= len) {
pos += nread;
} else {
memmove(buf, buf + pos + nread - len, len);
pos = len;
}
}
fclose(file);
return res;
}
void test(char *buffer, int size)
{
int length = strlen(buffer);
for (int i = 0; i <= length; ++i)
{
if (buffer[i] == '"')
{
int _size = i + size;
if (_size > length)
continue;
if (buffer[i + size] == '"')
{
}
}
}
}
This is how I read the file.
FILE *file = NULL;
size_t filesize = 0;
uint8_t *filebuffer = 0;
file = fopen("tokens.txt", "r");
if (file)
{
fseek(file, 0, SEEK_END);
filesize = ftell(file);
fseek(file, 0, SEEK_SET);
filebuffer = calloc(filesize + 1, 1);
if (filebuffer)
{
fread(filebuffer, 1, filesize, file);
for (size_t i = 0; i < filesize; i++)
{
if (filebuffer[i] == 0)
filebuffer[i] = '.';
}
char array[filesize];
strncpy(array, filebuffer, filesize);
array[filesize] = '\0';
test(array, 59);
}
}
"array" is char array[filesize];, "filesize" is ftell(file); (the file is valid and not NULL) the content of the file is asd"12345678912345678912345678912345678911111231231231231231232"asdasdasdasdasdasdss
for some weird reason it reaches to the "continue;" when the statement is not true...
Edit: I tried printing the values in the block of the if statement and for some reason I receive ->
Size: 122
Length: 84
Someone have any idea on how to solve it?
array[filesize] = '\0'; // access outside of array boundaries
My main problem is when i run the program it not work and a runnig time error is jumping on my screen. Can someone explain me whats the problem and help me?
*argv[i] is the adress and ignore the fact i don't have any free for my mallocs.
int main(int argc, char** argv)
{
FILE* file = 0;
file = fopen(argv[1], "r");
int numOfLines = countLines(file), i = 0, ch = 0, j = 0, flag = 0;
char** content;
content = (char**)malloc(numOfLines * sizeof(char*));
int* charsInLine = (int*)malloc(numOfLines * sizeof(int));
countCharsInLine(file, charsInLine);
fseek(file, 0, SEEK_SET);
for (i = 0; i < numOfLines; i++)
{
int lenOfFile = 0;
fseek(file, 0, SEEK_END);
lenOfFile = ftell(file);
content[i] = (char*)malloc(charsInLine[i] * sizeof(char) + 1);
content = fread(content, 1, lenOfFile, file);
}
for (i = 0; i < numOfLines; i++)
{
printf("%d string = %s", i,content[i]);
}
fclose(file);
getchar();
return 0;
}
I am going to assume countLines and countCharsInLine works correctly
Here is the updated code with comments
int main(int argc, char** argv)
{
FILE* file fopen(argv[1], "r");
if (file == NULL) {
// Output some error message
return EXIT_FAILURE;
}
int numOfLines = countLines(file); // I assume this rewinds.
char** content;
content = malloc(numOfLines * sizeof(char*)); // Do not need a cast
int* charsInLine = malloc(numOfLines * sizeof(int));
countCharsInLine(file, charsInLine);
rewind(file); // Easier to read the fseek
for (int i = 0; i < numOfLines; i++)
{
content[i] = malloc(charsInLine[i] + 1); // * sizeof(char) - Do not need this as sizeof(char) is defined as 1
// Reading one item of size charsInLine[i]
if (fread(content[i], charsInLine[i], 1, file) != 1) {
// Some error has occurred
fclose(file);
return EXIT_FAILURE;
}
content[i][charsInLine[i]] = 0; // Add null character
int ch = fgetc(file);
if (ch != '\n' && ch != EOF) { // Should be reading either the new line at the end of the line, or EOF
// Some error has occurred
fclose(file);
return EXIT_FAILURE;
}
}
fclose(file);
// Should free up the stuff that is malloced - I leave that up to you
return EXIT_SUCCESS;
}
I am reading a string from a file. After like the second or third time the function gets executed, one or more random characters become appended to the buffer string and I have no idea why that happens.
Here's the piece of code:
scorefile = fopen("highscore.dat", "rb");
if (scorefile)
{
fseek(scorefile, 0, SEEK_END);
length = ftell(scorefile);
fseek(scorefile, 0, SEEK_SET);
buffer = malloc(length);
if (buffer)
{
fread(buffer, 1, length, scorefile);
}
fclose(scorefile);
}
Am I doing something wrong here?
Let's spell it all out and go slightly more robust:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
char *loadScoreFile(const char *filename)
{
char *buffer = NULL;
FILE *scorefile = fopen(filename, "r");
if (scorefile != NULL)
{
(void) fseek(scorefile, 0, SEEK_END);
int length = ftell(scorefile);
(void) fseek(scorefile, 0, SEEK_SET);
buffer = malloc(length + 1);
if (buffer != NULL)
{
assert(length == fread(buffer, 1, length, scorefile));
buffer[length] = '\0';
}
(void) fclose(scorefile);
}
return buffer;
}
int main()
{
for (int i = 0; i < 10; i++)
{
char *pointer = loadScoreFile("highscore.dat");
if (pointer != NULL)
{
printf("%s", pointer);
free(pointer);
}
}
return 0;
}
if you use buffer = malloc(length);, and then read length bytes into it, it will be one byte too short. Char arrays in C are zero-terminated, so they need an extra byte to but that zero. buffer = malloc(length+1); will fix this.
I'm quite new to C. I'm trying to write a code that finds a string in a I/O stream, and I don't understand what I'm doing wrong. I know the error is probably in the large while loop (in the code below).
I want the function to return the location in bytes from the beginning of the stream and -1 if it fails for some reason. It just keeps returning -1 for any file I try it on.
long find_string(const char *str, const char *filename, long offset)
{
FILE *f = fopen(filename, "r");
if (!f){
return -1;
}
int s=0,c;
c = fgetc(f);
if(c == EOF){
return -1;
}
char *check = malloc(sizeof(char));
fseek(f, 0L, SEEK_END); // Sees and stores how long the file is
long sz = ftell(f);
fseek(f, 0L, SEEK_SET);
if(fseek(f, offset,SEEK_SET) != 0){ // finds the position of offset
return -1;
}
while(fgetc(f) != EOF){
c = fgetc(f);
if(c == str[0] && ftell(f) < sz){
check[0] = c;
offset = ftell(f);
}
s++;
for (unsigned int r=1; r < (strlen(str));r++){
c = fgetc(f);
if(c == str[s]){
check = realloc(check, sizeof(char)*s);
check[s] = c;
s++;
}
}
if(strcmp(check, str)==0){
free(check);
fclose(f);
break;
}
else{
check = realloc(check, sizeof(char));
offset = -1;
}
}
return offset;}
Any help is greatly appreciated
This would be much easier if you simply memory-mapped the entire file and ran a standard string searching algorithm on it.
For memory mapping, see: Linux - Memory Mapped File
For string searching code, see: strstr() for a string that is NOT null-terminated
Please check the lines with comment updated
long find_string(const char *str, const char *filename, long offset)
{
FILE *f = fopen(filename, "r");
if (!f){
return -1;
}
int s=0,c;
c = fgetc(f);
if(c == EOF){
return -1;
}
char *check = malloc(sizeof(char));
fseek(f, 0L, SEEK_END); // Sees and stores how long the file is
long sz = ftell(f);
fseek(f, 0L, SEEK_SET);
if(fseek(f, offset,SEEK_SET) != 0){ // finds the position of offset
return -1;
}
c = fgetc(f); // Updated
while(c != EOF){ // Updated
if(c == str[0] && ftell(f) < sz){
check[0] = c;
offset = ftell(f);
}
s++;
for (unsigned int r=1; r < (strlen(str));r++){
c = fgetc(f);
if(c == str[s]){
check = realloc(check, sizeof(char)*s);
check[s] = c;
s++;
}
}
if(strcmp(check, str)==0){
free(check);
fclose(f);
break;
}
else{
check = realloc(check, sizeof(char));
offset = -1;
}
c = fgetc(f); //Updated
}
return offset;}
since you are using fgetc at the condition and start of the look, you actually comparing the second char of file with first char of str. update and check.