Copying a file into char array dynamically - c

I have code for copying a text file (not binary) into an array of chars.
I am trying to copy the contents of a .txt file into a char* array.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
bool is_not_binary(const void *data, size_t len)
{
return memchr(data, '\0', len) != NULL;
}
int main(void)
{
char* file_name="./bash_example.sh";
FILE *file = fopen (file_name, "r");
size_t size = 64;
const size_t line_size = 300;
char *mem = malloc(size);
if (mem == NULL) {
perror("malloc");
exit(EXIT_FAILURE);
}
*mem = 0;
if (file != NULL && is_not_binary(file_name,line_size)) {
char* line = malloc(line_size);
while (fgets(line, line_size, file) != NULL) {
size_t total = strlen(mem) + strlen(line) + 1;
if (size < total) {
size_t newsize = (total & ~1U) << 1;
char *tmp = realloc(mem, newsize);
if (tmp == NULL) {
perror("realloc");
exit(EXIT_FAILURE);
}
mem = tmp;
size = newsize;
}
strcat(mem, line);
}
}
printf("%s",mem);
return 0;
}
But in my code I must specify a static size:
size_t size = 64;
const size_t line_size = 300;
I want to remove this and I want dynamic allocation, is it possible?
Actually it's static code, 64 and 300.

I coded up an example that makes use of fopen, fread, and the classic example doubling the buffer size as the content grows.
int main()
{
const size_t initial_size = 1024;
char* contents = (char*)malloc(initial_size);
size_t length = 0;
size_t allocated = initial_size;
FILE* file = fopen("./sbatch_example.sh", "r");
if (file)
{
while (1)
{
size_t remaining = allocated - length;
size_t result = 0;
if (remaining == 0)
{
contents = (char*)realloc(contents, allocated*2);
allocated = allocated*2;
remaining = allocated-length;
}
result = fread(contents+length, 1, remaining, file);
length += result;
if (result==0) /* EOF */
{
break;
}
}
}
if (file)
{
fclose(file);
file = NULL;
}
/* at this point, "contents" is your file data bytes
and "length" is the number of bytes copied into that array*/
/*optional: append a null char to the end of the buffer to make it easier for debugging and print statements */
contents = (char*)realloc(contents, length+1);
contents[length] = '\0';
free(contents);
return 0;
}

You can use ftell() to get the total length in advance and fread() to read a whole at one time as below.
FILE *fp = fopen("./sbatch_example.sh", "r");
if (fp) {
fseek(fp, 0, SEEK_END);
size_t size = ftell(fp);
rewind(fp);
char *mem = malloc(size+1);
size_t nr = fread(mem, size, 1, fp);
}

Related

Difficulty setting pointers in dynamically allocated nested structs

Been at this thing for awhile. I'm writing a C program that parses a csv file into a nested struct. When I pass the ** to the struct into the add_field function, I am able to get all of the pointers until I get to the field[(* f) - 1] pointer of the field array. That always returns NULL and I cannot figure out why. Any help would be greatly appreciated. Thank you.
Sorry for the huge code dump but I didn't want to leave anything out. I've been checking for null after every alloc but haven't incorporated them yet in this rewrite (I’m watching variable values with step in on clion).
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
/* Inline function for handling errors during memory allocation */
extern int errno;
#ifndef error
#define ERROR static inline void *
__attribute((always_inline)) ERROR error() {
printf("%d\n", errno);
printf("%s", strerror(errno));
exit(EXIT_FAILURE);
}
#endif
/* ------------------------------------------------------------ */
#define MAX_BUFFER_SIZE 100
typedef struct data_field {
char * data;
size_t fldsize;
} FIELD;
typedef struct data_set {
int field_count;
size_t setsize;
FIELD ** field;
} SET;
typedef struct csv_file {
char * filename;
int set_count;
size_t filesize;
SET ** set;
} CSV;
CSV * alloc_csv();
void add_set(CSV ** fp, const int * setcnt);
void add_field(SET **sp, char *buffer, const int *f);
char * file_read(char * file_name);
int main()
{
int b = 0;
ulong bufcnt = 0;
int fldcnt = 0;
int setcnt = 0;
int bmax = 100;
char tok1 = '\n';
char tok2 = ',';
char * buffer;
char * stream;
stream = file_read("/home/jonathon/Documents/programming/personal/csv/data_files/MOCK_DATA.csv");
CSV * file = {0};
void * filetmp = malloc(sizeof(CSV));
file = filetmp;
file->set_count = 0;
void * arrtmp = calloc(1, sizeof(SET *));
file->set = (SET **)arrtmp;
void * settmp = calloc(1, sizeof(SET));
file->set[0] = (SET *)settmp;
setcnt++;
void * buftmp = malloc(sizeof(char) * MAX_BUFFER_SIZE);
buffer = buftmp;
// read stream until end of field
buftmp = malloc(sizeof(char) * MAX_BUFFER_SIZE);
buffer = buftmp;
for (int c = 0; stream[c] != '\0'; c++)
{
if (b >= bmax)
{
buftmp = realloc(buffer, sizeof(char) * (MAX_BUFFER_SIZE + bmax));
buffer = buftmp;
}
switch (stream[c])
{
case 10:
buffer[b] == '\0';
b = 0;
break;
case 44:
buffer[b] == '\0';
add_field(&file->set[setcnt - 1], buffer, &fldcnt);
fldcnt++;
b = 0;
break;
default:
buffer[b] = stream[c];
b++;
}
}
}
void add_field(SET ** sp, char * buffer, const int * f)
{
ulong buflen = strlen(buffer + 1);
if ((*f) == 0)
{
(*sp)->field = (FIELD **)calloc(1, sizeof(FIELD *));
}
else
{
(*sp)->field = (FIELD **)realloc((*sp)->field, sizeof(FIELD *) * ((*f) + 1));
}
(*sp)->field[(*f) - 1] = (FIELD *)calloc(1, sizeof(FIELD));
(*sp)->field[(*f) - 1]->data = (char *)calloc(buflen, sizeof(char));
memcpy((*sp)->field[(*f) - 1]->data, buffer, buflen * sizeof(char));
}
void free_csv(CSV ** fp, const int * setcnt, const int * fldcnt)
{
for (int i = 0; i < * setcnt; i++)
{
for (int j = 0; j < * fldcnt; j++)
{
free((* fp)->set[i]->field[j]->data);
free((* fp)->set[i]->field[j]);
}
free((* fp)->set[i]->field);
free((* fp)->set[i]);
}
free((* fp)->set);
free(* fp);
}
char *file_read(char* file_name)
{
FILE *fp = fopen(file_name, "rb");
size_t file_size;
char* file_buffer = NULL;
if (!fp)
{
perror("Error: ");
exit(EXIT_FAILURE);
}
else
{
// Seek to end of file to get file file_size
fseek(fp, 0, SEEK_END);
file_size = ftell(fp);
file_buffer = (char *)calloc(file_size, sizeof(char)); // Allocate buffer file_size
if (!file_buffer)
{
perror("Error: ");
exit(EXIT_FAILURE);
}
// Seek to beginning of file to read from start.
fseek(fp, 0, SEEK_SET);
if (fread(file_buffer, file_size, 1, fp) != 1) // Read into buffer
{
perror("Error: ");
exit(EXIT_FAILURE);
}
fclose(fp);
}
return file_buffer;
}```

Allocate memory for big .txt file in C

I need to allocate memory using malloc or calloc, for a large file that looks like this:
2357 VKLYKK
7947 1WTFWZ
3102 F2IXK3
2963 EXMW55
2865 50CJES
2510 8PC1AI
There are around 10K of lines in that .txt file. How can I allocate the required memory?
What is the program supposed to do? The program has to read the whole .txt file. Sort it by the first number and send output to out.txt. But since the the input of the file is huge it won't let me.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#pragma warning(disable : 4996)
typedef struct {
int number;
char order[10];
} Data;
int sorting(const void *a, const void *b)
{
Data *dataA = (Data *)a;
Data *dataB = (Data *)b;
// return (dataA->number - dataB->number); // Ascending order
return (dataB->number - dataA->number); // Descending order
}
int main()
{
FILE *fp;
FILE *f = fopen("out.txt", "w");
Data data[20];
char *line[150]
int i = 0;
char file_name[10] = "";
printf("enter file name: ");
scanf("%s", &file_name);
fp = fopen(file_name, "r");
if (fp == NULL)
{
printf("\n%s\" File not found!", file_name);
exit(1);
}
while (1)
{
if (fgets(line, 150, fp) == NULL)
break;
char *pch;
pch = strtok(line, " ");
data[i].number = atoi(pch);
pch = strtok(NULL, " ");
strcpy(data[i].order, pch);
i++;
}
printf("#################\n");
printf("number\torder\n");
for (int k = 0; k < 10; k++)
{
printf("%d\t%s", data[k].number, data[k].order);
}
qsort(data, 10, sizeof(Data), sorting);
printf("\n#################\n");
printf("number\torder\n");
for (int k = 0; k < 10; k++)
{
printf("%d\t%s", data[k].number, data[k].order);
fprintf(f, "%d\t%s", data[k].number, data[k].order);
}
fclose(fp);
fclose(f);
return 0;
}
If your file contains 10,000 lines or so, your while loop will quickly overrun your data array (which you declared with only 20 elements). If the number of lines is not known in advance, the best way to do this is with a growing array. Start by initialing data (and new dataSize and dataCount variables) as follows:
int dataSize = 0;
int dataCount = 0;
Data *data = NULL;
Then as you use up the space in the array, when it reaches dataSize entries you will have to grow your array. Something like this:
while (1) {
if (dataCount >= dataSize) {
Data *new;
dataSize += 1000;
new = realloc(data,dataSize * sizeof *data);
if (new == NULL) {
perror("realloc");
free(data);
return 2;
}
data = new;
}
int cnt = fscanf(fp,"%d %9s", &data[dataCount].number, data[dataCount].order);
if (cnt == EOF)
break;
if (cnt != 2) {
printf("Error reading data\n");
return 1;
}
dataCount++;
}
When the while loop finishes (if there were no errors), the data array will contain all of the data, and dataCount will be the total number of data items found.
Note that I used fscanf instead of fgets, as this eliminates the need for intermediate step like calls to atoi and strcpy. I also put in some simple error checking. I chose 1000 as the growth increment, though you can change that. But too small and it fragments the heap more rapidly, and too big requires larger amounts of memory too quickly.
this line
char* line[150];
creates an array of 150 char pointers, this is not what you want if you are reading one line like this
if (fgets(line, 150, fp) == NULL) break;
I suspect you wanted one line of 150 chars
so do
char line[150];
You can use qsort to sort the array of lines, but that may not be the best approach. It may be more effective to insert the lines into a data structure that can be easily traversed in order. Although this simple minded solution is very much less than ideal, here's a simple-minded example of inserting into a tree. This sorts the lines lexicographically; modifying it to sort numerically based on the line is a good exercise.
/* Build an (unbalanced) binary search tree of lines in input. */
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void * xrealloc(void *buf, size_t num, size_t siz, void *end);
FILE * xfopen(const char *path, const char *mode);
struct entry {
const char *line;
struct entry *node[2];
};
static struct entry *
new_node(const char *line)
{
struct entry *e = calloc(1, sizeof *e);
if( e == NULL ){
perror("calloc");
exit(EXIT_FAILURE);
}
e->line = line;
return e;
}
/*
* Note that this tree needs to be rebalanced. In a real
* project, we would use existing libraries.
*/
static struct entry *
lookup(struct entry **lines, const char *line)
{
struct entry *t = *lines;
if( t ){
int cmp = strcmp(line, t->line);
return lookup(&t->node[cmp > 0], line);
} else {
return *lines = new_node(line);
}
}
/* In-order descent of the tree, printing one line per entry */
static void
print_table(const struct entry *t)
{
if( t ){
print_table(t->node[0]);
printf("%s", t->line);
print_table(t->node[1]);
}
}
static void *
xrealloc(void *buf, size_t num, size_t siz, void *endvp)
{
char **endp = endvp;
ptrdiff_t offset = endp && *endp ? *endp - (char *)buf : 0;
buf = realloc(buf, num * siz);
if( buf == NULL ){
perror("realloc");
exit(EXIT_FAILURE);
}
if( endp != NULL ){
*endp = buf + offset;
}
return buf;
}
int
main(int argc, char **argv)
{
FILE *ifp = argc > 1 ? xfopen(argv[1], "r") : stdin;
struct entry *lines = NULL;
char *line = NULL;
size_t cap = 0;
while( getline(&line, &cap, ifp) > 0 ){
(void) lookup(&lines, line);
line = NULL;
}
print_table(lines);
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = path[0] != '-' || path[1] != '\0' ? fopen(path, mode) :
*mode == 'r' ? stdin : stdout;
if( fp == NULL ){
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}

Reading the files contents to the allocated string

I am trying to read the contents of a file and copy those contents into a string which has dynamic memory. However my program keeps allocating only 8 bytes to x. Ultimately I'm trying to create a general function that can read contents from a file and then return the contents as a char. Any help is appreciated.
char* readFile(unsigned long size, char *fileName) {
FILE *file = fopen(fileName, "r");
int c;
if(file != NULL)
{
while(c != EOF){ //calculate size of file
c = fgetc(file); //store character
size++;
}
char *x = (char *)malloc((size) * (sizeof(char))); // Size of x = 8 and I'm not sure why
rewind(file);
printf("\n");
int i = 0;
while(size - 1 > i){ //Reading the files contents to the allocated string
c = fgetc(file); //store character
if(c == EOF){
break;
}
x[i] = c;
i++;
}
fclose(file);
printf("Done Reading");
}
else
{
printf("\nError: Unable to open the file for Reading.\n");
}
rewind(file);
return 0;
}
I get a segmentation fault when I run
char* str = readFile(size, originalFile);
I would use stat to first get the size of your file
stat() retrieves information about the file pointed
to by pathname;
And then I made some tiny modifications to your function to make it work:
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
char* readFile(char *fileName) {
FILE *file;
struct stat st;
if (!(file = fopen(fileName, "r")))
return NULL;
stat(fileName, &st);
unsigned long size = st.st_size;
char *x;
if (!(x = (char *)malloc((size + 1) * (sizeof(char))))) // Size of x = 8 and I'm not sure why
return NULL;
unsigned long i = 0;
while (i < size) //Reading the files contents to the allocated string
x[i++] = getc(file);
x[i] = '\0';
fclose(file);
printf("Done Reading\n");
return x;
}
int main(void) {
char *fileName = "a.txt";
char *res = readFile(fileName);
printf("%s\n", res);
return 0;
}
Don't forget that in C strings are NULL terminated, you need to malloc size+1 to add the final '\0'.
This is (IMHO) an easier way to find the size of the file:
char *readFile(const char *fileName)
{
unsigned long size = 0;
char *x;
FILE *file = fopen(fileName, "r");
int c;
if (file != NULL)
{
fseek(file, 0, SEEK_END); /* SET the position at EOF */
size = ftell(file); /* Record the position at EOF to return size of file */
rewind(file); /* SET position back to Origin */
printf("size detected %ld\n", size); // reads correct size
x = (char *)malloc((size) * (sizeof(char)));
rewind(file);
printf("\n");
int i = 0;
while (size - 1 > i)
{ //Reading the files contents to the allocated string
c = fgetc(file); //store character
if (c == EOF)
{
break;
}
x[i] = c;
i++;
}
fclose(file);
printf("Done Reading\n");
}
else
{
printf("\nError: Unable to open the file for Reading.\n");
return NULL;
}
rewind(file);
return x; // * you need to return x not zero
}
Successfully reads the content of the file:
#include <stdio.h>
#include <stdlib.h>
int main()
{
char *data;
data = readFile("records.txt");
printf("%s\n", data);
return 0;
}

Function to insert text line by line from a file into string array passed by pointer

I'm trying to create a function read_lines that takes a file *fp, a pointer to char** lines, and pointer to int num_lines. The function should insert each line of text into lines, and increase num_lines to however many lines the file has.
Its probably really simple but I've been trying to insert the text for several hours now.
This is what main.c would look like. Everything but read_lines is already defined and working.
int main(int argc, char* argv[]){
char** lines = NULL;
int num_lines = 0;
FILE* fp = validate_input(argc, argv);
read_lines(fp, &lines, &num_lines);
print_lines(lines, num_lines);
free_lines(lines, num_lines);
fclose(fp);
return 0;
}
This is one of my attempts at trying to append lines, but I couldn't figure it out.
read_lines.c
void read_lines(FILE *fp, char ***lines, int *num_lines) {
int i;
int N = 0;
char s[200];
for (i=0; i<3; i++)
{
while(fgets(s, 200, fp)!=NULL){N++;}
char strings[50][200];
rewind(fp);
fgets(s, 200, fp);
strcpy(lines[i],s);
}
}
I'd appreciate any help at solving this, thanks.
A solution (without headers and error checking for readability):
void read_lines(FILE *stream, char ***lines_ptr, size_t *num_lines_ptr) {
char **lines = NULL;
size_t num_lines = 0;
char *line = NULL;
size_t len = 0;
ssize_t nread;
while ((nread = getline(&line, &len, stream)) != -1) {
lines = lines == NULL
? malloc(sizeof(char*))
: realloc(lines, (num_lines+1)*sizeof(char*));
lines[num_lines] = malloc(nread+1);
memcpy(lines[num_lines], line);
++num_lines;
}
free(line);
*lines_ptr = lines;
*num_lines_ptr = num_lines;
}
The full solution:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// lines_ptr: Output. Initial value ignored. To be freed by caller on success.
// num_lines_ptr: Output. Initial value ignored.
// Returns: 0 on error (errno set). 1 on success.
int read_lines(FILE *stream, char ***lines_ptr, size_t *num_lines_ptr) {
char ***lines = NULL;
size_t num_lines = 0;
char *line = NULL;
size_t len = 0;
ssize_t nread;
while ((nread = getline(&line, &len, stream)) != -1) {
char **new_lines = lines == NULL
? malloc(sizeof(char*))
: realloc(lines, (num_lines+1)*sizeof(char*));
if (new_lines == NULL)
goto error;
lines = new_lines;
lines[num_lines] = malloc(nread+1);
if (lines[num_lines] == NULL)
goto error;
memcpy(lines[num_lines], line);
++num_lines;
}
if (ferror(stream))
goto error;
free(line);
*lines_ptr = lines;
*num_lines_ptr = num_lines;
return 1;
error:
for (size_t i=num_lines; i--; )
free(lines[i]);
free(lines);
free(line);
*lines_ptr = NULL;
*num_lines_ptr = 0;
return 0;
}
(You could save three lines by using the ..._ptr vars instead of setting them at the end, but is that really worth the readability cost?)
I find fgets hard to use and more trouble than it's worth. Here is a fgetc and malloc-based approach:
void read_lines(FILE *fp, char ***lines, int *num_lines) {
int c;
size_t line = 0;
size_t pos = 0;
size_t len = 64;
*lines = malloc(1 * sizeof(char*));
(*lines)[0] = malloc(len);
while ((c = fgetc(fp)) != EOF) {
if (c == '\n') {
(*lines)[line][pos] = '\0';
line++;
pos = 0;
len = 64;
*lines = realloc(*lines, (line+1) * sizeof(char*));
} else {
(*lines)[line][pos] = c;
}
pos++;
if (pos >= len) {
len *= 2;
(*lines)[line] = realloc((*lines)[line], len);
}
}
*num_lines = line+1;
}
I haven't checked this, so correct me if I made any mistakes. Also, in real code you would do lots of error checking here that I have omitted.
assuming you have allocated enough memory to lines, following should work
if not you have to malloc/calloc() for lines[i] before doing strcpy() in every
iteration of the loop.
void read_lines(FILE *fp, char ***lines, int *num_lines) {
int N = 0;
char s[200];
while(fgets(s, 200, fp)!=NULL){
N++;
strcpy((*lines)[N],s);
}
*num_lines = N; // update pointer with value of N which is number of lines in file
}

How to determine the size of (all the content) a file so I can allocate memory for it at once?

I am trying to allocate memory for the content of a file with words(separated by: \n).
How do I replace the 16000 to make it usable with files of greater size?
My code:
typedef struct node {
bool is_word;
struct node* children[27];
} node;
node* root;
bool load(const char* dictionary)
{
FILE *fp;
fp = fopen(dictionary, "rb");
node* node_bucket = calloc(16000, sizeof(node));
node* next_free_node = node_bucket;
// compute...
// to later free the memory with another function
root = node_bucket;
}
Thanks
You can allocate memory dynamically without knowing how large the file is. I used a block size that is a power of 2, which is generally kinder towards block I/O. It wastes a little when the last block is only partially used, but here is an example, which you could adapt to work with your node structs:
#include <stdio.h>
#include <stdlib.h>
#define BLOCKSIZE 16384
int main(void) {
unsigned char *buf = NULL;
unsigned char *tmp = NULL;
size_t totalread = 0;
size_t currentsize = 0;
size_t currentread = 0;
FILE *fp;
if((fp = fopen("test.txt", "rb")) == NULL)
exit(1);
do {
currentsize += BLOCKSIZE;
if((tmp = realloc(buf, currentsize)) == NULL)
exit(1);
buf = tmp;
currentread = fread( &buf[totalread], 1, BLOCKSIZE, fp);
totalread += currentread;
} while (currentread == BLOCKSIZE);
printf("Total size was %zu\n", totalread);
free(buf);
fclose(fp);
return 0;
}
The simplest approach to get the size of a file is using ftell():
fseek(fp, 0, SEEK_END); // non-portable
long size = ftell(fp);
However, as the comment says, this is not portable, because N1570 documents in "7.21.9.2 The seek function":
2 ...... A binary stream need not meaningfully support fseek calls with a
whence value of SEEK_END.
Alternatively, you can write a function to get the size of a file on your own:
size_t fSize(FILE *fp)
{
void *ptr = malloc(1);
size_t size = 0;
while(fread(ptr, 1, 1, fp) == 1)
size++;
if(feof(fp))
return size;
else
return 0; // reading error
}
An accuracy-efficiency trade-off:
size_t fRoughSize(FILE *fp)
{
void *ptr = malloc(1024);
size_t size = 0;
while(fread(ptr, 1024, 1, fp) == 1024)
size += 1024;
if(feof(fp))
return size;
else
return 0; // reading error
}

Resources