How to load data from file into float* in C - c

I have an ASCII file in which the entries of a vector are stored. I do not know the length (number of rows) of the file, nor do I have an estimation about its size as it may strongly vary from a few lines to some tens of thousands. I need an efficient way to read the data stored in that file and load them to a float* variable. The code should be in C.
My question is how to allocate memory for the vector which I need to create given that I do not know its size beforehand? Can you please give an example?
Finally, what is in your opinion the most appropriate prototype for such a function? Should it be something like:
load_data(const char* filename, float* data, int* vector_size);
?
Update 1.: While doing some initial tests, I wrote the following code:
void create_random_matrix(float* matrix, const int nrows) {
matrix = (float *) malloc(sizeof (float) * nrows);
short i;
for (i = 0; i < nrows; i++) {
matrix[i] = 7.0f;
}
}
which should return an array with all its elements equal to 7.0f. Instead, when I call it from my main.c:
float *a;
create_random_matrix(a, 10);
printf("%f",a[0]);
it prints 0.0f. How is that possible?!
Update 2. Was it not for your help, the following (working) code wouldn't have been written:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#define LINE_SIZE 10
#define ALLOCATION_CHUNK 50
int load_vector_data(const char* filename, float** vector, int* length) {
*vector = malloc(sizeof (float) * ALLOCATION_CHUNK);
int allocated_rows = ALLOCATION_CHUNK;
u_short i = 0;
FILE* fr = fopen(filename, "r");
if (fr == NULL) {
exit(FILE_NOT_FOUND);
}
char line[LINE_SIZE];
while (fgets(line, LINE_SIZE, fr) != NULL) {
if (i >= allocated_rows){
allocated_rows += ALLOCATION_CHUNK;
*vector = realloc(*vector, sizeof (float) * allocated_rows);
}
strip_newline(&line, LINE_SIZE);
(*vector)[i] = strtod(line, (char **) NULL);
i++;
}
*length = i;
*vector = realloc(*vector, sizeof (float) * i);
fclose(fr);
}
void strip_newline(char *str, int size) {
u_short i;
for (i = 0; i < size; ++i) {
if (str[i] == '\n') {
str[i] = '\0';
return;
}
}
}
I tried it with an 8000-lines file and seems to be working just fine! Please, feel free to comment.

fgets is you friend for reading the data from the file (If my assumption that each bit of data is on a new-line is correct). Read each line in 1 by 1 and use an strtof on the text you read. Reading text and converting to floats is inherently a slow process so I reckon the above is perfectly good enough.
As for your second question there are a couple of ways to do it. You could pass a float** in and make the malloc inside the function. Though this has the disadvantage of you needing to free it outside the function which is not exactly obvious. The only other way, i can think of, would be to scan through the file and count the number of new-lines then pre-allocate the array length for that.
Its hard to say whether doing a malloc and a bunch of reallocs would be more efficient than the scan through to count the number of lines, it would probably be worth trying both methods (neither is particularly hard) and seeing which one is faster for you.

Related

Dynamic memory and pointers arguments

I have this two functions, meant to do the same thing - read every line from a file with only integers and store them on an array:
I call them like this on the main() function:
StoreinArray1(X, size, f);
StoreinArray2(X, size, f);
The First works but the Second doesn't.
First
int StoreinArray1(int X[], int *size, char *file)
{
int i=0;
FILE *f;
f = fopen(file, "r");
X = (int*) realloc (X, *size * sizeof(int));
for (i=0;i<*size;i++)
{
fscanf(f, "%d", &X[i]);
}
return 1;
}
Second
int StoreinArray2(int X[], int *size, char *file)
{
FILE *f;
f = fopen(file, "r");
if (f == NULL)
return -1; // failed opening
*size = 0;
while (!feof(f))
{
if (fscanf(f, "%d", &X[*size]) == 1)
*size = *size + 1;
}
fclose(f);
return 1;
}
For the First I used dynamic memory allocation and actually calculated size:
X = malloc(0);
while ((ch = fgetc(f)) != EOF)
{
if (ch == '\n')
lines++;
}
size = &lines;
For the Second I can't do the same. Visual Studio Code crashes when I try.
So I tried to do *size = 0 and then StoreinArray2(X, size, f); but it didn't work either.
So my question is about the second function:
Is it calculating the size while it is scanning the file? Supposedly it isn't necessary to use dynamic memory allocation (my teacher said).
If so then how can I pass some "size" argument correctly? As a pointer or just a simple integer?
Thank you in advance!
Edit:
Here is the full First program:
#include <stdio.h>
#include <stdlib.h>
int main()
{
FILE *f;
int *size=0, *X, lines=1;
char *file = {"file.txt"};
char ch;
X = malloc(0);
f = fopen(file, "r");
while ((ch = fgetc(f)) != EOF)
{
if (ch == '\n')
lines++;
}
size = &lines;
StoreinArray(X, size, file);
}
int StoreinArray(int X[], int *size, char *file)
{
int i=0;
FILE *f;
f = fopen(file, "r");
X = (int*) realloc (X, *size * sizeof(int));
for (i=0;i<*size;i++)
{
fscanf(f, "%d", &X[i]);
}
for (i=0;i<*size;i++)
printf("%d\n",X[i]);
return 1;
}
And the Second:
int main()
{
int X[100];
int *size;
char *file = {"file.txt"};
*size = 0;
StoreinArray(X, size, file);
}
int StoreinArray(int X[], int *size, char *file)
{
FILE *f;
f = fopen(file, "r");
if (f == NULL)
return -1;
*size = 0;
while (!feof(f))
{
if (fscanf(f, "%d", &X[*size]) == 1)
*size = *size + 1;
}
fclose(f);
return 1;
}
In first I had to open the file in main to count the number of lines. I know I forgot fclose(f) and free(X) in main, but with those instructions VSC crashes.
int StoreinArray (int X[], int *size, char *file)
{
FILE *f;
int i=0;
f = fopen(file, "r");
if (f == NULL)
return -1;
*size = 0;
while (!feof(f))
{
if (fscanf(f, "%d", &X[*size]) == 1)
{
*size = *size + 1;
X = (int *) realloc (X , *size * sizeof(int));
}
}
fclose(f);
return 1;
}
int main()
{
int *X, size=0;
char *file = {"f.txt"};
X=malloc(0);
StoreinArray(X, &size, file);
free(X);
}
The problem with the second version of your program is the declaration of size in main. Declare it as an int, not a pointer to an int. Your current program is crashing because you didn't allocate any space for size, and when StoreInArray tried to update it, you got an access violation. So, main should look like this:
int main()
{
int X[100];
int size;
char *file = {"file.txt"};
size = 0;
StoreinArray(X, &size, file);
}
OK, I'll try to be through about it and explain all the things I can find.
First of all, we need to talk about variables, pointers and memory, because it seems that you don't have a very firm grasp of these concepts. Once that clicks, the rest should follow easily.
First up, simple variables. That part is easy, I think you more or less understand that.
int x; // This is an integer variable. It's not initialized, so its value could be anything
int meaning = 42; // This is another integer variable. Its value will be 42.
double pi = 3.14; // A variable with digits after the decimal point
char c = 15; // Another inte... well, yes, actually, char is also an integer.
char c2 = 'a'; // Nice, this also counts. It's converted to an integer behind the scenes.
Etc.
Similarly with arrays:
int arr[10]; // Array with 10 values. Uninitialized, so they contain garbage.
int arr2[3] = { 1, 2, 3 }; // Array with 3 values, all initialized
int arr3[] = {1, 2, 3, 4, 5}; // Array with 5 values.
Arrays are basically just a bunch of variables created at once. When you make an array, C needs to know the size, and the size must be a fixed number - you can't use another variable. There's a reason for this, but it's technical and I won't go into that.
Now about memory. Each of these variables will be stored somewhere in your computer's RAM. The precise location is unpredictable and can vary each time you run your program.
Now, RAM is like a huuuge array of bytes. There's byte number 0, byte number 1, etc. An int variable takes up 4 bytes, so it could, for example, end up in bytes number 120, 121, 122 and 123.
All the bytes in a single variable (or in a single array) will be next to each other in RAM. Two different variables could end up in the opposite ends of your RAM, but the bytes in each of those variables will be together.
Now we come to the concept of a pointer. A pointer is basically just an integer variable. It contains the RAM-number of the first byte of some other variable. Let's look at an example:
int i = 42;
int *p = &i;
Suppose that the variable i got stored in the bytes number 200...203 (that's 4 bytes). In those bytes we have the value 42. Then let's suppose that the variable p got stored in the bytes number 300...303 (that's another 4 bytes). Well, these 4 bytes will contain the value 200, because that's the first byte of the i variable.
This is also what programmers mean when they say "(memory) address of <variable>" or "a pointer to <variable>. It's the number of the first byte in RAM of <variable>. Since all bytes of the same variable stick together, then by knowing the first byte (and knowing the type of the variable), you can figure out where the rest of <variable> is in memory.
Now let's add one more line in our example:
*p = 5;
What the computer does in this case is it takes the address which was stored in p, goes to that location in memory, treats the following 4 bytes as an integer, and puts the value 5 there. Since we had previously set p to "point" at the address of i, this has the same effect as simply setting the i variable itself.
Ok, did you get all of this? This is a bit tricky and usually takes a while to wrap your head around it. Feel free to re-read it as many times as necessary to understand it. You'll need it to move on.
Ready? Ok, let's talk a bit about stack and dynamic memory.
When the program starts, the OS automatically allocates a bit of memory for it, just to make it easier for it to start. It's like a big array of bytes, all together in memory. Nowadays it's usually about 1MB, but it can vary. This memory is called "The Stack". Why is it called that? Eh, I'll explain some other time.
Anyways, When your main() function starts, the OS goes like "Here you go my good fellow, a pointer to The Stack. It's all yours to use as you see fit! Have a nice day!"
And your main() function then uses it to store all the variables you make in it. So when you go like p = &i; then the address you get stored in p is somewhere within The Stack.
Now when main() calls another function, such as StoreinArray(), it also gives it a pointer to the stack and says "OK, here's a pointer to the stack. Careful, I've already used the first XXX bytes of it, but feel free to use the rest".
And then StoreinArray() uses the stack to put its variables there. And when StoreinArray() calls something else, it does the same, and on and on.
Now, there are a few things to note here:
This scheme is nice, because nobody needs to "allocate" or "deallocate" any memory. It's easier, it's faster.
However, when a function returns, all its variables are considered to be gone and that memory is fair game to anyone else later wanting it. So be careful about pointers pointing to it. They will only be valid while the function is running. When it returns - well, the pointer will still "work", but who knows when something will overwrite that memory... And if you write to that memory, who can tell what you will mess up? Many subtle bugs have been created this way.
The stack is fairly limited and can be exhausted. When all bytes in it are used up, your program WILL crash. The typical way this happens is either when you try to create a very large array, or you go into some sort of infinite loop where the function keeps calling itself over and over again. Try that. :)
So, for these cases you use "dynamic" memory. In C that mostly means malloc(). You tell malloc() how many bytes of memory you need, and malloc() finds a large enough unclaimed space in the RAM, marks it as used, and gives you a pointer to it. Well, that's a simplified view of things anyway.
The same approach works when you don't know beforehand how much memory you will need.
The downside is that you need to free() the memory when you're done with it, or you may run out of available memory, and then malloc() will fail. Be also aware that after you've freed the memory, all pointers to it should be considered invalid, because you're not the owner of that particular piece of memory anymore. Anything can happen if you keep messing with it.
Phew, that's a lot. OK, I need a break. I'll come back and analyze your programs a bit later. But if you've understood all of this, you should now be able to spot the mistakes in your programs. Try going through them, line by line, and narrating to yourself what each line does.
Many, many hours later:
OK, so let's take a look at your programs. The first one:
#include <stdio.h>
#include <stdlib.h>
int main()
{
FILE *f;
int *size=0, *X, lines=1;
char *file = {"file.txt"};
char ch;
X = malloc(0);
f = fopen(file, "r");
while ((ch = fgetc(f)) != EOF)
{
if (ch == '\n')
lines++;
}
size = &lines;
StoreinArray(X, size, file);
}
int StoreinArray(int X[], int *size, char *file)
{
int i=0;
FILE *f;
f = fopen(file, "r");
X = (int*) realloc (X, *size * sizeof(int));
for (i=0;i<*size;i++)
{
fscanf(f, "%d", &X[i]);
}
for (i=0;i<*size;i++)
printf("%d\n",X[i]);
return 1;
}
There are two things that can be improved here. First - the size and lines variables. There's no need for both of them. Especially since you set size to point to lines anyway. Just keep lines and all will be fine. When you pass it to StoreinArray(), pass it as a simple integer. There's no need for a pointer.
Second, the X array. You're doing something odd with it and it seems like you're fumbling in the dark. There's no need for the malloc(0) and then later realloc(X, *size*sizeof(int). Keep it simple - first count the lines, then allocate the memory (as much as needed). Also, keep the memory allocation in the main() method and just pass the final X to the StoreinArray. This way you can avoid another subtle bug - when inside the StoreinArray() function you execute the line X = (int*) realloc (X, *size * sizeof(int)); the value of X changes only inside the StoreinArray() function. When the function returns, the variable X in the main() function will still have its old value. You probably tried to work around this with the reallocate() dance, but that's not how it works. Even worse - after the realloc(), whatever value the X used to be, isn't a valid pointer anymore, because realloc() freed that old memory! If you had later tried to do anything with the X variable in the main() function, your program would have crashed.
Let's see how your program would look with the changes I proposed (plus a few more small cosmetic tweaks):
#include <stdio.h>
#include <stdlib.h>
int main()
{
char *file = "file.txt";
FILE *f = fopen(file, "r");
int *X, lines=1;
char ch;
while ((ch = fgetc(f)) != EOF)
{
if (ch == '\n')
lines++;
}
fclose(f);
X = (int *)malloc(lines * sizeof(int));
StoreinArray(X, lines, file);
}
void StoreinArray(int X[], int lines, char *file)
{
int i=0;
FILE *f = fopen(file, "r");
for (i=0;i<lines;i++)
{
fscanf(f, "%d", &X[i]);
}
fclose(f);
for (i=0;i<lines;i++)
printf("%d\n",X[i]);
}
OK, now the second program.
int main()
{
int X[100];
int *size;
char *file = {"file.txt"};
*size = 0;
StoreinArray(X, size, file);
}
int StoreinArray(int X[], int *size, char *file)
{
FILE *f;
f = fopen(file, "r");
if (f == NULL)
return -1;
*size = 0;
while (!feof(f))
{
if (fscanf(f, "%d", &X[*size]) == 1)
*size++;
}
fclose(f);
return 1;
}
Right off the bat, the size variable will crash your program. It's a pointer that isn't initialized so it points to some random place in memory. When a little lower you try to write to the memory it points to (*size = 0), that will crash, because most likely you won't own that memory. Again, you really don't need a pointer here. In fact, you don't need the variable at all. If you need to know in the main program how many integers the StoreinArray() read, you can simply have it return it.
There's another subtle problem - since the size of X array is fixed, you cannot afford to read more than 100 integers from the file. If you do, you will go outside the array and your program will crash. Or worse - it won't crash, but you'll be overwriting the memory that belongs to some other variable. Weird things will happen. C is lenient, it doesn't check if you're going outside the allowed bounds - but if you do, all bets are off. I've spent many hours trying to find the cause of a program behaving weirdly, only to find out that some other code in some completely unrelated place had gone outside its array and wreaked havoc upon my variablers. This is VERY difficult to debug. Be VERY, VERY careful with loops and arrays in C.
In fact, this kind of bug - going outside an array - has it's own name: a "Buffer Overrun". It's a very common security exploit too. Many security vulnerabilities in large, popular programs are exactly this problem.
So, the best practice would be to tell StoreinArray() that it can store at most 100 integers in the X array. Let's do that:
#include <stdio.h>
#include <stdlib.h>
#define MAX_X 100
int main()
{
int X[MAX_X];
char *file = "file.txt";
int lines;
lines = StoreinArray(X, MAX_X, file);
}
int StoreinArray(int X[], int maxLines, char *file)
{
FILE *f;
int lines;
f = fopen(file, "r");
if (f == NULL)
return -1;
while (!feof(f))
{
if (fscanf(f, "%d", &X[lines]) == 1)
lines++;
if (lines == maxLines)
break;
}
fclose(f);
return lines;
}
So, there you are. This should work. Any more questions? :)

Partition a 1D char* into 2D char**

There are a lot of questions about converting a 2D array into a 1D array, but I am attempting just the opposite. I'm trying to partition a string into substrings of constant length and house them in a 2D array. Each row of this 2D matrix should contain a substring of the initial string, and, if each row were to be read in succession and concatenated, the initial string should be reproduced.
I nearly have it working, but for some reason I am losing the first substring (partitions[0] -- length 8*blockSize) of the initial string (bin):
int main (void){
char* bin = "00011101010000100001111101001101000010110000111100000010000111110100111100010011010011100011110000011010";
int blockSize = 2; // block size in bytes
int numBlocks = strlen(bin)/(8*blockSize); // number of block to analyze
char** partitions = (char**)malloc((numBlocks+1)*sizeof(char)); // break text into block
for(int i = 0; i<numBlocks;++i){
partitions[i] = (char*)malloc((8*blockSize+1)*sizeof(char));
memcpy(partitions[i],&bin[8*i*blockSize],8*blockSize);
partitions[i][8*blockSize] = '\0';
printf("Printing partitions[%d]: %s\n", i, partitions[i]);
}
for(int j=0; j<numBlocks;++j)
printf("Printing partitions[%d]: %s\n", j,partitions[j]);
return 0;
}
The output is as follows:
Printing partitions[0]: 0001110101000010
Printing partitions[1]: 0001111101001101
Printing partitions[2]: 0000101100001111
Printing partitions[3]: 0000001000011111
Printing partitions[4]: 0100111100010011
Printing partitions[5]: 0100111000111100
Printing partitions[0]: Hj
Printing partitions[1]: 0001111101001101
Printing partitions[2]: 0000101100001111
Printing partitions[3]: 0000001000011111
Printing partitions[4]: 0100111100010011
Printing partitions[5]: 0100111000111100
The construction of partitions in the first for loop is successful. After construction at read out, the string at partitions[0] contains garbage values. Can anyone offer some insight?
int numBlocks = strlen(bin)/(8*blockSize); // number of block to analyze
char** partitions = (char**)malloc((numBlocks+1)*sizeof(char)); // break text into block
for(int i = 0; i<numBlocks;++i){
partitions[i] = (char*)malloc((8*blockSize+1)*sizeof(char));
memcpy(partitions[i],&bin[8*i*blockSize],8*blockSize);
partitions[i][8*blockSize] = '\0';
printf("Printing partitions[%d]: %s\n", i, partitions[i]);
}
This all looks suspicious to me; it's far too complex for the task, making it a prime suspect for errors.
For reasons explained in answers to this question, void * pointers which are returned by malloc and other functions shouldn't be casted.
There's no need to multiply by 1 (sizeof (char) is always 1 in C). In fact, in your first call to malloc you should be multiplying by sizeof (char *) (or better yet, sizeof *partitions, as in the example below), since that's the size of the type of element that partitions points at.
malloc might return NULL, resulting in undefined behaviour when you attempt to assign into the location it points at.
Anything else (i.e. everything that isn't NULL) that malloc, calloc or realloc returns will need to be freed when no longer in use, or else tools such as valgrind (a leak detection program, useful for people who habitually forget to free allocated objects and thus cause memory leaks) will report false positives and lose part of their usefulness.
numBlocks, i, or anything else that's for counting elements of an array, should be declared as a size_t to follow standard convention (e.g. check the strlen manual, synopsis section to see how strlen is declared, noting the type of the return value is size_t). Negative values caused by overflows here will obviously cause the program to misbehave.
I gather you've yet to think about any excess beyond the last group of 8 characters... This shouldn't be difficult to incorporate.
I suggest using a single allocation, such as:
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BLOCK_SIZE 8
int main(void) {
char const *bin = "00011101010000100001111101001101000010110000111100000010000111110100111100010011010011100011110000011010";
size_t bin_length = strlen(bin),
block_count = (bin_length / BLOCK_SIZE)
+ (bin_length % BLOCK_SIZE > 0); // excess as per point 6 above
char (*block)[BLOCK_SIZE + 1] = malloc(block_count * sizeof *block);
if (!block) { exit(EXIT_FAILURE); }
for (size_t x = 0; x < block_count; x++) {
snprintf(block[x], BLOCK_SIZE + 1, "%s", bin + x * BLOCK_SIZE);
printf("Printing partitions[%zu]: %s\n", x, block[x]);
}
for (size_t x = 0; x < block_count; x++) {
printf("Printing partitions[%zu]: %s\n", x, block[x]);
}
free(block);
exit(0);
}
Their are a few problems with your code.
You are allocating **partitions incorrectly.
Instead of:
char** partitions = (char**)malloc((numBlocks+1)*sizeof(char)); /* dont need +1, as numblocks is enough space. */
You need to allocate space for char* pointers, not char characters.
instead, this needs to be:
char** partitions = malloc((numBlocks+1)*sizeof(char*));
Also read Why not to cast result of malloc(), as it is not needed in C.
malloc() needs to be checked everytime, as it can return NULL when unsuccessful.
Once finished with the space allocated, it is always good to free() memory previously requested by malloc(). It is important to do this at some point in the program.
Here is some code which shows this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BLOCKSIZE 2
#define BLOCK_MULTIPLIER 8
int main(void) {
const char *bin = "00011101010000100001111101001101000010110000111100000010000111110100111100010011010011100011110000011010";
const size_t blocksize = BLOCKSIZE;
const size_t multiplier = BLOCK_MULTIPLIER;
const size_t numblocks = strlen(bin)/(multiplier * blocksize);
const size_t numbytes = multiplier * blocksize;
char **partitions = malloc(numblocks * sizeof(*partitions));
if (partitions == NULL) {
printf("Cannot allocate %zu spaces\n", numblocks);
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < numblocks; i++) {
partitions[i] = malloc(numbytes+1);
if (partitions[i] == NULL) {
printf("Cannot allocate %zu bytes for pointer\n", numbytes+1);
exit(EXIT_FAILURE);
}
memcpy(partitions[i], &bin[numbytes * i], numbytes);
partitions[i][numbytes] = '\0';
printf("Printing partitions[%zu]: %s\n", i, partitions[i]);
}
printf("\n");
for(size_t j = 0; j < numblocks; j++) {
printf("Printing partitions[%zu]: %s\n", j,partitions[j]);
free(partitions[j]);
partitions[j] = NULL;
}
free(partitions);
partitions = NULL;
return 0;
}
Which outputs non-garbage values:
Printing partitions[0]: 0001110101000010
Printing partitions[1]: 0001111101001101
Printing partitions[2]: 0000101100001111
Printing partitions[3]: 0000001000011111
Printing partitions[4]: 0100111100010011
Printing partitions[5]: 0100111000111100
Printing partitions[0]: 0001110101000010
Printing partitions[1]: 0001111101001101
Printing partitions[2]: 0000101100001111
Printing partitions[3]: 0000001000011111
Printing partitions[4]: 0100111100010011
Printing partitions[5]: 0100111000111100

how to malloc memory for string pointers array, and other problems

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LINE_SIZE 100
#define INITIAL_BUFFER_SIZE 16
int main() {
char **line_buffer;
line_buffer = (char **) malloc(INITIAL_BUFFER_SIZE);
int i = 0;
int lines = 0;
// the size of buffer
int buffer_size = 1;
int *buffer_size_p = &buffer_size;
char *line_one = (char *) malloc(MAX_LINE_SIZE);
//read lines from a file
while (gets(line_one)) {
line_buffer[lines] = (char *) malloc(MAX_LINE_SIZE);
strcpy(line_buffer[lines], line_one);
lines++;
line_one = (char *) malloc(MAX_LINE_SIZE);
// if too much lines double the buffer size
if (lines == *buffer_size_p) {
buffer_size *= 2;
line_buffer = IncreaseBuffer(line_buffer, buffer_size_p);
}
}
PrintLines(line_buffer, lines);
// sorting all the line by strcmp
for (i = 0; i < lines; i++) {
printf("%s", line_buffer[i]);
// int min = MinLineIndex(line_buffer, i, lines);
// SwapLines(line_buffer, i, min);
}
PrintLines(line_buffer, lines);
// free(line_buffer);
return 0;
}
First, ignore the gets() function, this is required for this.
First, I used a for loop to do (char *) line_buffer[lines] = (char *) malloc(MAX_LINE_SIZE);
It does not work;
I did this way, it worked, However, after read several lines from a file, the first line become something like "��R", and it is changing every time.
And, I cannot use free(line_buffer); as well.
Self studying. Please help.
A big problem is that you only allocate 16 bytes for line_buffer, which on a 64-bit system would mean you only allocate space for two pointers.
If you read more than two (or four on 32-bit systems) lines you will go out of bounds.
If you want to allocate space for 16 pointers, then you need to allocate space for INITIAL_BUFFER_SIZE * sizeof(char *) bytes, or better yet, INITIAL_BUFFER_SIZE * sizeof(*line_buffer).
Then there's the issue of memory leaks, of which you have quite a few. First of all you don't need to allocate memory for the temporary buffer line_one, declare it as a normal fixed-size array. That will get rid of quite a few leaks as you reallocate it in the loop without freeing the old memory.
Then to free the line_buffer memory you actually need to free each individual entry first, before you call free on line_buffer.
Remember: For each malloc you need a corresponding free.
And instead of allocating and copying each line explicitly, while it's not a standard C function just about all libraries have a strdup function which does it for you, so you can do e.g.
line_buffer[lines] = strdup(line_one);
And the buffer_size_p variable is not needed, if you need to use a pointer to the buffer_size variable, just use the address-of operator directly when needed, like in
line_buffer = IncreaseBuffer(line_buffer, &buffer_size);
From what you've posted your use (or, rather incorrect) use malloc may be the culprit.
For line buffer, you are only allocating 16 bytes, when really you should have been doing this:
line_buffer = (char **) malloc(INITIAL_BUFFER_SIZE*sizeof(char*));
Also, I would also encourage the use of sizeof in your other malloc call:
line_buffer[lines] = (char *) malloc(MAX_LINE_SIZE*sizeof(char))
The next crucial thing is:
int buffer_size = 1;
should be:
int buffer_size = INITIAL_BUFFER_SIZE;
After, all, you just allocated INITIAL_BUFFER_SIZE pointers to lines.
(Posted on behalf of the OP).
First, ignore the gets() function, this is required for this:
(char **) malloc(INITIAL_BUFFER_SIZE * sizeof(char *))
Almost all the problems are because I don't know I need to use * sizeof(char *).
Here is my final code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LINE_SIZE 100
#define INITIAL_BUFFER_SIZE 16
int main() {
char **line_buffer;
line_buffer = (char **) malloc(INITIAL_BUFFER_SIZE * sizeof(char *));
int i = 0;
int lines = 0;
for (i = 0; i<INITIAL_BUFFER_SIZE; i++){
line_buffer[i] = (char *) malloc(MAX_LINE_SIZE * sizeof(char));
}
// the size of buffer
int buffer_size = INITIAL_BUFFER_SIZE;
while (gets(line_buffer[lines++])) {
if (lines == buffer_size) {
buffer_size *= 2;
line_buffer = IncreaseBuffer(line_buffer, &buffer_size);
}
}
// sorting all the line by strcmp
for (i = 0; i < lines; i++) {
int min = MinLineIndex(line_buffer, i, lines - 2);
SwapLines(line_buffer, i, min);
}
PrintLines(line_buffer, lines - 1);
for (i = 0; i < buffer_size ; i++) {
free(line_buffer[i]);
}
free(line_buffer);
return 0;
}

Create Dynamic Array in c

I am trying to create a dynamic array of size 32, and then read intergers from some file, and store them in the array. When the array gets filled up, then double its size (create another of twice the size, copy elements to it from old array and free old array) till the input file is exhausted.
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define limit 32
int main (int argc, char *argv[]) {
FILE *file = fopen ("myFile1.txt", "r");
int i = 0, num;
int *B, *C;
B = malloc (sizeof (int) * limit);
while (fscanf (file, "%d", &num) > 0) {
if (i < limit) {
B[i] = num;
i++;
}
else {
C = malloc (sizeof (int) * 2 * limit);
memcpy (C, B, limit * sizeof (int));
free (B);
B = C;
B[i] = num;
i++;
limit = limt * 2;
i++;
}
}
return 0;
}
I am getting an error like: "lvalue required as left operand of assignment" and 2nd: "segmentation fault".
Since, I am trying to explore new possibilities related to dynamic arrays, to increase my knowledge; help me out by modifying the code.
Any help will be highly appreciated.
You can actually allocate more memory for your array using realloc() :
void *realloc(void *ptr, size_t size)
Instead of doing that :
{
C=malloc(sizeof(int)*2*limit);
memcpy(C,B,limit*sizeof(int));
free(B);
B=C;
B[i]=num;
i++;
limit=limt*2;
i++;
}
You can simply do :
B = realloc(B,new_size_in_bytes);
Talking about your code:
The preprocessor command #define will replace every occurrence of the word "limit" with the value associated to it (32, in this case) before the compilation. So you can't really change the value of a macro during run-time. If you wish to do that my advice would be not to define limit but use a variable instead.
About the segfault I'm not having one. Be sure to have the file called "myFile1.txt" in the same folder where the .c file is, also check if you misspelled it.

Copying a file line by line into a char array with strncpy

So i am trying to read a text file line by line and save each line into a char array.
From my printout in the loop I can tell it is counting the lines and the number of characters per line properly but I am having problems with strncpy. When I try to print the data array it only displays 2 strange characters. I have never worked with strncpy so I feel my issue may have something to do with null-termination.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char* argv[])
{
FILE *f = fopen("/home/tgarvin/yes", "rb");
fseek(f, 0, SEEK_END);
long pos = ftell(f);
fseek(f, 0, SEEK_SET);
char *bytes = malloc(pos); fread(bytes, pos, 1, f);
int i = 0;
int counter = 0;
char* data[counter];
int length;
int len=strlen(data);
int start = 0;
int end = 0;
for(; i<pos; i++)
{
if(*(bytes+i)=='\n'){
end = i;
length=end-start;
data[counter]=(char*)malloc(sizeof(char)*(length)+1);
strncpy(data[counter], bytes+start, length);
printf("%d\n", counter);
printf("%d\n", length);
start=end+1;
counter=counter+1;
}
}
printf("%s\n", data);
return 0;
}
Your "data[]" array is declared as an array of pointers to characters of size 0. When you assign pointers to it there is no space for them. This could cause no end of trouble.
The simplest fix would be to make a pass over the array to determine the number of lines and then do something like "char **data = malloc(number_of_lines * sizeof(char *))". Then doing assignments of "data[counter]" will work.
You're right that strncpy() is a problem -- it won't '\0' terminate the string if it copies the maximum number of bytes. After the strncpy() add "data[counter][length ] = '\0';"
The printf() at the end is wrong. To print all the lines use "for (i = 0; i < counter; i++) printf("%s\n", data[counter]);"
Several instances of bad juju, the most pertinent one being:
int counter = 0;
char* data[counter];
You've just declared data as a variable-length array with zero elements. Despite their name, VLAs are not truly variable; you cannot change the length of the array after allocating it. So when you execute the lines
data[counter]=(char*)malloc(sizeof(char)*(length)+1);
strncpy(data[counter], bytes+start, length);
data[counter] is referring to memory you don't own, so you're invoking undefined behavior.
Since you don't know how many lines you're reading from the file beforehand, you need to create a structure that can be extended dynamically. Here's an example:
/**
* Initial allocation of data array (array of pointer to char)
*/
char **dataAlloc(size_t initialSize)
{
char **data= malloc(sizeof *data * initialSize);
return data;
}
/**
* Extend data array; each extension doubles the length
* of the array. If the extension succeeds, the function
* will return 1; if not, the function returns 0, and the
* values of data and length are unchanged.
*/
int dataExtend(char ***data, size_t *length)
{
int r = 0;
char **tmp = realloc(*data, sizeof *tmp * 2 * *length);
if (tmp)
{
*length= 2 * *length;
*data = tmp;
r = 1;
}
return r;
}
Then in your main program, you would declare data as
char **data;
with a separate variable to track the size:
size_t dataLength = SOME_INITIAL_SIZE_GREATER_THAN_0;
You would allocate the array as
data = dataAlloc(dataLength);
initially. Then in your loop, you would compare your counter against the current array size and extend the array when they compare equal, like so:
if (counter == dataLength)
{
if (!dataExtend(&data, &dataLength))
{
/* Could not extend data array; treat as a fatal error */
fprintf(stderr, "Could not extend data array; exiting\n");
exit(EXIT_FAILURE);
}
}
data[counter] = malloc(sizeof *data[counter] * length + 1);
if (data[counter])
{
strncpy(data[counter], bytes+start, length);
data[counter][length] = 0; // add the 0 terminator
}
else
{
/* malloc failed; treat as a fatal error */
fprintf(stderr, "Could not allocate memory for string; exiting\n");
exit(EXIT_FAILURE);
}
counter++;
You are trying to print data with a format specifier %s, while your data is a array of pointer s to char.
Now talking about copying a string with giving size:
As far as I like it, I would suggest you to use
strlcpy() instead of strncpy()
size_t strlcpy( char *dst, const char *src, size_t siz);
as strncpy wont terminate the string with NULL,
strlcpy() solves this issue.
strings copied by strlcpy are always NULL terminated.
Allocate proper memory to the variable data[counter]. In your case counter is set to 0. Hence it will give segmentation fault if you try to access data[1] etc.
Declaring a variable like data[counter] is a bad practice. Even if counter changes in the subsequent flow of the program it wont be useful to allocate memory to the array data.
Hence use a double char pointer as stated above.
You can use your existing loop to find the number of lines first.
The last printf is wrong. You will be printing just the first line with it.
Iterate over the loop once you fix the above issue.
Change
int counter = 0;
char* data[counter];
...
int len=strlen(data);
...
for(; i<pos; i++)
...
strncpy(data[counter], bytes+start, length);
...
to
int counter = 0;
#define MAX_DATA_LINES 1024
char* data[MAX_DATA_LINES]; //1
...
for(; i<pos && counter < MAX_DATA_LINES ; i++) //2
...
strncpy(data[counter], bytes+start, length);
...
//1: to prepare valid memory storage for pointers to lines (e.g. data[0] to data[MAX_DATA_LINES]). Without doing this, you may hit into 'segmentation fault' error, if you do not, you are lucky.
//2: Just to ensure that if the total number of lines in the file are < MAX_DATA_LINES. You do not run into 'segmentation fault' error, because the memory storage for pointer to line data[>MAX_DATA_LINES] is no more valid.
I think that this might be a quicker implementation as you won't have to copy the contents of all the strings from the bytes array to a secondary array. You will of course lose your '\n' characters though.
It also takes into account files that don't end with a new line character and as pos is defined as long the array index used for bytes[] and also the length should be long.
#include <stdio.h>
#include <stdlib.h>
#define DEFAULT_LINE_ARRAY_DIM 100
int main(int argc, char* argv[])
{
FILE *f = fopen("test.c", "rb");
fseek(f, 0, SEEK_END);
long pos = ftell(f);
fseek(f, 0, SEEK_SET);
char *bytes = malloc(pos+1); /* include an extra byte incase file isn't '\n' terminated */
fread(bytes, pos, 1, f);
if (bytes[pos-1]!='\n')
{
bytes[pos++] = '\n';
}
long i;
long length = 0;
int counter = 0;
size_t size=DEFAULT_LINE_ARRAY_DIM;
char** data=malloc(size*sizeof(char*));
data[0]=bytes;
for(i=0; i<pos; i++)
{
if (bytes[i]=='\n') {
bytes[i]='\0';
counter++;
if (counter>=size) {
size+=DEFAULT_LINE_ARRAY_DIM;
data=realloc(data,size*sizeof(char*));
if (data==NULL) {
fprintf(stderr,"Couldn't allocate enough memory!\n");
exit(1);
}
}
data[counter]=&bytes[i+1];
length = data[counter] - data[counter - 1] - 1;
printf("%d\n", counter);
printf("%ld\n", length);
}
}
for (i=0;i<counter;i++)
printf("%s\n", data[i]);
return 0;
}

Resources