I have huge amount of txt files which contains 64x64 matrices consisting of integers. txt files has names like:
mat_1.txt, mat_2.txt, mat_3.txt, mat_4.txt, .... , mat_n.txt.
I have to create a variable, allocate space on host and device, read txt file and copy to device. is it possible to do it all in one loop?
I know how to create a string with sprintf but do not know how to use this string for example for declaring variables.
char fname[10];
for( int k=1; k<=n; k++ )
{
sprintf( fname, "mat_%d", k );
int *fname; // how to say to compiler that insted of `fname` there
// should be `mat_1` and in next cycle `mat_2`?
}
You can't create a variable name at runtime. Variable names are for compiler and only compiler to know and cannot be generated on the fly.
What you need is an array. Since the data already need to be stored in an array, you need to add 1 dimension to your array.
For example, if data in mat_1.txt is a 1 dimensional array, you can have:
int **mat; // 2D array
int k;
mat = malloc(n * sizeof(*mat)); // get an array of pointers (add error checking)
for (k = 1; k <= n; ++k)
{
char fname[20];
FILE *fin;
sprintf(fname, "mat_%d.txt", k);
fin = fopen(fname, "r");
if (fin == NULL)
/* Handle failure */
/* read number of items */
mat[k-1] = malloc(number of items * sizeof(*mat[k-1])); // allocate an array for each file
/* read data of file and put in mat[k-1] */
fclose(fin);
}
/* At this point, data of file mat_i.txt is in array mat[i-1] */
/* When you are done, you need to free the allocated memory: */
for (k = 0; k < n; ++k)
free(mat[k]);
free(mat);
What computer are you using?
A 64x64 array of int, where int is 4 bytes, is array of 16,386 bytes, 22,500 files with 1 matrix/file would be 368,640,000 bytes.
That works fine on my 5 year old laptop:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_FILES (22500)
#define MAX_J (64)
#define MAX_K (64)
int a[MAX_FILES][MAX_J][MAX_K];
const char namefmt[] = "mat_%d";
int main (int argc, const char * argv[]) {
char fname[strlen(namefmt)+10+1]; // an int has 10 or less digits
for (int fileNumber=0; fileNumber<MAX_FILES; ++fileNumber) {
sprintf(fname, namefmt, fileNumber);
FILE* fp = fopen(fname, "r");
if (fp == NULL) {
fprintf(stderr, "Error, can't open file %s\n", fname);
exit(1);
}
for (int j=0; j<MAX_J; ++j) {
for (int k=0; k<MAX_K; ++k) {
//... read the value
}
}
fclose(fp);
}
return 0;
}
It should work okay (though may become painfully slow) on a modern computer running an operating system with virtual memory, and enough swap space. Declaring it as an array, rather than using malloc will save a miniscule amount of space, but otherwise is the same.
Related
In a school assignment, I have to sort elements of struct that are located in binary file. I think I have managed to sort it, but I have a problem with printing the result. I don't know how to access elements of struct because data must be read from file, so I only have address of the first struct in a array. (I think it should stay in array so that I can use qsort.)
This is the main code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "readfile.h"
typedef struct {
char name[32];
double value;
} record;
int nuvu(record* a, record* b){
if(a->name < b->name) return -1;
if(a->name > b->name) return 1;
if(a->value < b->value) return -1;
if(a->value > b->value) return 1;
}
int main()
{
long N;
unsigned char* p = readfile( "d.bin", &N );
char* s;
scanf("%s",&s);
int k= N/sizeof(record);
qsort(p,k,sizeof(record),(int(*)(const void*, const void *))nuvu);
printf???
free(p);
return 0;
}
Additional:
readfile.c
#include "readfile.h"
unsigned char* readfile( char* filename, long* pN )
{
FILE* f= fopen(filename,"rb");
if(f==0){
*pN=-1;
return 0;
}
fseek(f,0,SEEK_END);
*pN=ftell(f);
fseek(f,0,SEEK_SET);
char*p=malloc(*pN);
if(p==0){
*pN=-2;
fclose(f);
return 0;
}
size_t r = fread(p,1,*pN,f);
if(r!=*pN){
*pN=-3;
fclose(f);
free(p);
return 0;
}
fclose(f);
return p;
}
readfile.h
#ifndef __READFILE_H
#define __READFILE_H
#include <stdio.h>
#include <stdlib.h>
unsigned char* readfile(char* filename, long* pN);
#endif /* __READFILE_H */
The biggest confusion you seem to have is "How do I read my array of structs back in?"
unsigned char* p = readfile( "d.bin", &N );
Is no way to begin. The concept of reading records from a binary file into an array of struct, is to read sizeof (struct record) bytes from the file into storage for type struct record. (this will ignore for now the lack of serialization of the data, padding and portability issues, and the fact we are using a typedef).
Knowing the file size, and knowing the sizeof (struct record) allows you to (1) validate the number of records you will read from the file, e.g. (nbytes / sizeof (struct record)) and (2) determine whether there are any stray bytes leftover that will not be part of the read (e.g. if (nbytes / sizeof (struct record) != 0)) which if present you should at least warn about.
Depending on the number of records you have to read and whether there is an upper bound for that number will determine whether you can use a fixed size array (or VLA) or whether you need to dynamically allocate (and reallocate) to address an unknown number of records or to prevent StackOverflow.. Regardless how you handle creating storage for your records -- It is up to you to make sure you do not write beyond the bounds of the storage you create.
Below, for example purposes we will simply work with an array of 100 records. The dividing line between what will fit on the stack and when you need to dynamically allocate will be compiler dependent, but any time you start thinking about tens of thousands of records, you need to consult your compiler documentation and start thinking about dynamic allocation.
fread provides a simple method to read binary records from a file and validate that you actually read the number of records you intended to read. For example, given a declaration of an array of 100 records in rec, you could do something like the following:
enum { MAXC = 32, MAXS = 100 }; /* if you need constants, define them */
...
record rec[MAXS] = {{ .name = "" }}; /* array of 100 records */
...
nrec = nbytes / sizeof *rec; /* number of records based on file size */
/* read / validate nrec records from file */
if (fread (rec, sizeof *rec, nrec, fp) != nrec) {
perror (fn);
return 1;
}
With your records successfully read from your file, using qsort to sort the records (either by name or by value) requires you to understand that the const void * pointers to be compared in your comparison function will be pointer to rec, so you must provide an appropriate cast within your comparison function to access and compare the values. For example to perform a string comparison on name, you can do something similar to the following:
/** record string comparison on name */
int reccmpname (const void *a, const void *b)
{
const record *ra = a,
*rb = b;
return strcmp (ra->name, rb->name);
}
Other than that, the remainder of what your code lack is validation of each step in the process. Always, always, validate the return of any function you use and handle any errors you encounter. A minimal example, without splitting the code up between separate source files could be something like the following. Splitting up into separate source files is left to you.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 32, MAXS = 100 }; /* if you need constants, define them */
typedef struct {
char name[MAXC];
double value;
} record;
/** record string comparison on name */
int reccmpname (const void *a, const void *b)
{
const record *ra = a,
*rb = b;
return strcmp (ra->name, rb->name);
}
int main (int argc, char **argv) {
record rec[MAXS] = {{ .name = "" }}; /* array of 100 records */
size_t nrec = 0; /* number of records from file */
long nbytes = 0; /* number of bytes in file */
char *fn = argc > 1 ? argv[1] : "dat/records.bin";
FILE *fp = fopen (fn, "rb");
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if (fseek (fp, 0, SEEK_END) == -1) { /* validate seek to end */
perror ("fseek");
return 1;
}
nbytes = ftell (fp); /* number of bytes in file */
if (nbytes == -1) {
perror ("ftell");
return 1;
}
if (fseek (fp, 0, SEEK_SET) == -1) { /* validate seek to start */
perror ("fseek");
return 1;
}
if (nbytes % sizeof *rec != 0) /* does file contain even no. or records? */
fprintf (stderr, "warning: file size not multiple of record size.\n");
nrec = nbytes / sizeof *rec; /* number of records based on file size */
/* read / validate nrec records from file */
if (fread (rec, sizeof *rec, nrec, fp) != nrec) {
perror (fn);
return 1;
}
fclose (fp); /* close file */
printf ("\n=== unsorted records ===\n\n"); /* output unsorted */
for (size_t i = 0; i < nrec; i++)
printf ("%-32s %g\n", rec[i].name, rec[i].value);
qsort (rec, nrec, sizeof *rec, reccmpname); /* qsort records */
printf ("\n=== sorted records ===\n\n"); /* output sorted */
for (size_t i = 0; i < nrec; i++)
printf ("%-32s %g\n", rec[i].name, rec[i].value);
return 0;
}
note: The data file used simply contained 100 struct records with dictionary words as name and random values as value shuffled before being written to the file.
Example Use/Output
$ ./bin/struct_rd_name_val_recs
=== unsorted records ===
Abscess 4.15871e+08
Abject 3.5743e+08
Abo 6.87659e+08
Aboard 2.02028e+09
Abase 3.34319e+08
...
=== sorted records ===
A 3.66907e+08
Aaa 5.59224e+07
Aaas 1.45617e+09
Aardvark 1.72828e+09
Aarhus 1.95723e+09
Let me know if you have any questions.
Use the standard procedure for qsort, don't change its signature. Use strcmp as noted in comments. You have to figure out the logic of how the structure is to be sorted. The example below sorts by record::name, if name is the same, it tests value, in that order:
int nuvu(const void * a_, const void * b_)
{
const record* a = a_;
const record* b = b_;
if(strcmp(a->name, b->name) == 0)
return a->value > b->value;
return strcmp(a->name, b->name);
}
Data is read as bytes in to p, it must be converted to "array of records" record* arr = (record*)p;. The number of items in the array should be filesize/sizeof(record) if everything went according to plan
int main(void)
{
long filesize = 0;
unsigned char* p = readfile("d.bin", &filesize);
if(!p)
return 0;
int count = filesize / sizeof(record);
record* arr = (record*)p;
qsort(arr, count, sizeof(record), nuvu);
for(int i = 0; i < count; i++)
printf("%s %f\n", arr[i].name, arr[i].value);
free(p);
return 0;
}
You can use the [] operator with a pointer:
struct my_struct {
int i, j;
};
struct my_struct * ptr = malloc(sizeof(struct my_struct) * 10);
for(int n = 0; 10 > n; ++ n)
{
ptr[n].i = n;
ptr[n].j = n*2;
}
free(ptr);
There are a lot of questions about converting a 2D array into a 1D array, but I am attempting just the opposite. I'm trying to partition a string into substrings of constant length and house them in a 2D array. Each row of this 2D matrix should contain a substring of the initial string, and, if each row were to be read in succession and concatenated, the initial string should be reproduced.
I nearly have it working, but for some reason I am losing the first substring (partitions[0] -- length 8*blockSize) of the initial string (bin):
int main (void){
char* bin = "00011101010000100001111101001101000010110000111100000010000111110100111100010011010011100011110000011010";
int blockSize = 2; // block size in bytes
int numBlocks = strlen(bin)/(8*blockSize); // number of block to analyze
char** partitions = (char**)malloc((numBlocks+1)*sizeof(char)); // break text into block
for(int i = 0; i<numBlocks;++i){
partitions[i] = (char*)malloc((8*blockSize+1)*sizeof(char));
memcpy(partitions[i],&bin[8*i*blockSize],8*blockSize);
partitions[i][8*blockSize] = '\0';
printf("Printing partitions[%d]: %s\n", i, partitions[i]);
}
for(int j=0; j<numBlocks;++j)
printf("Printing partitions[%d]: %s\n", j,partitions[j]);
return 0;
}
The output is as follows:
Printing partitions[0]: 0001110101000010
Printing partitions[1]: 0001111101001101
Printing partitions[2]: 0000101100001111
Printing partitions[3]: 0000001000011111
Printing partitions[4]: 0100111100010011
Printing partitions[5]: 0100111000111100
Printing partitions[0]: Hj
Printing partitions[1]: 0001111101001101
Printing partitions[2]: 0000101100001111
Printing partitions[3]: 0000001000011111
Printing partitions[4]: 0100111100010011
Printing partitions[5]: 0100111000111100
The construction of partitions in the first for loop is successful. After construction at read out, the string at partitions[0] contains garbage values. Can anyone offer some insight?
int numBlocks = strlen(bin)/(8*blockSize); // number of block to analyze
char** partitions = (char**)malloc((numBlocks+1)*sizeof(char)); // break text into block
for(int i = 0; i<numBlocks;++i){
partitions[i] = (char*)malloc((8*blockSize+1)*sizeof(char));
memcpy(partitions[i],&bin[8*i*blockSize],8*blockSize);
partitions[i][8*blockSize] = '\0';
printf("Printing partitions[%d]: %s\n", i, partitions[i]);
}
This all looks suspicious to me; it's far too complex for the task, making it a prime suspect for errors.
For reasons explained in answers to this question, void * pointers which are returned by malloc and other functions shouldn't be casted.
There's no need to multiply by 1 (sizeof (char) is always 1 in C). In fact, in your first call to malloc you should be multiplying by sizeof (char *) (or better yet, sizeof *partitions, as in the example below), since that's the size of the type of element that partitions points at.
malloc might return NULL, resulting in undefined behaviour when you attempt to assign into the location it points at.
Anything else (i.e. everything that isn't NULL) that malloc, calloc or realloc returns will need to be freed when no longer in use, or else tools such as valgrind (a leak detection program, useful for people who habitually forget to free allocated objects and thus cause memory leaks) will report false positives and lose part of their usefulness.
numBlocks, i, or anything else that's for counting elements of an array, should be declared as a size_t to follow standard convention (e.g. check the strlen manual, synopsis section to see how strlen is declared, noting the type of the return value is size_t). Negative values caused by overflows here will obviously cause the program to misbehave.
I gather you've yet to think about any excess beyond the last group of 8 characters... This shouldn't be difficult to incorporate.
I suggest using a single allocation, such as:
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BLOCK_SIZE 8
int main(void) {
char const *bin = "00011101010000100001111101001101000010110000111100000010000111110100111100010011010011100011110000011010";
size_t bin_length = strlen(bin),
block_count = (bin_length / BLOCK_SIZE)
+ (bin_length % BLOCK_SIZE > 0); // excess as per point 6 above
char (*block)[BLOCK_SIZE + 1] = malloc(block_count * sizeof *block);
if (!block) { exit(EXIT_FAILURE); }
for (size_t x = 0; x < block_count; x++) {
snprintf(block[x], BLOCK_SIZE + 1, "%s", bin + x * BLOCK_SIZE);
printf("Printing partitions[%zu]: %s\n", x, block[x]);
}
for (size_t x = 0; x < block_count; x++) {
printf("Printing partitions[%zu]: %s\n", x, block[x]);
}
free(block);
exit(0);
}
Their are a few problems with your code.
You are allocating **partitions incorrectly.
Instead of:
char** partitions = (char**)malloc((numBlocks+1)*sizeof(char)); /* dont need +1, as numblocks is enough space. */
You need to allocate space for char* pointers, not char characters.
instead, this needs to be:
char** partitions = malloc((numBlocks+1)*sizeof(char*));
Also read Why not to cast result of malloc(), as it is not needed in C.
malloc() needs to be checked everytime, as it can return NULL when unsuccessful.
Once finished with the space allocated, it is always good to free() memory previously requested by malloc(). It is important to do this at some point in the program.
Here is some code which shows this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BLOCKSIZE 2
#define BLOCK_MULTIPLIER 8
int main(void) {
const char *bin = "00011101010000100001111101001101000010110000111100000010000111110100111100010011010011100011110000011010";
const size_t blocksize = BLOCKSIZE;
const size_t multiplier = BLOCK_MULTIPLIER;
const size_t numblocks = strlen(bin)/(multiplier * blocksize);
const size_t numbytes = multiplier * blocksize;
char **partitions = malloc(numblocks * sizeof(*partitions));
if (partitions == NULL) {
printf("Cannot allocate %zu spaces\n", numblocks);
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < numblocks; i++) {
partitions[i] = malloc(numbytes+1);
if (partitions[i] == NULL) {
printf("Cannot allocate %zu bytes for pointer\n", numbytes+1);
exit(EXIT_FAILURE);
}
memcpy(partitions[i], &bin[numbytes * i], numbytes);
partitions[i][numbytes] = '\0';
printf("Printing partitions[%zu]: %s\n", i, partitions[i]);
}
printf("\n");
for(size_t j = 0; j < numblocks; j++) {
printf("Printing partitions[%zu]: %s\n", j,partitions[j]);
free(partitions[j]);
partitions[j] = NULL;
}
free(partitions);
partitions = NULL;
return 0;
}
Which outputs non-garbage values:
Printing partitions[0]: 0001110101000010
Printing partitions[1]: 0001111101001101
Printing partitions[2]: 0000101100001111
Printing partitions[3]: 0000001000011111
Printing partitions[4]: 0100111100010011
Printing partitions[5]: 0100111000111100
Printing partitions[0]: 0001110101000010
Printing partitions[1]: 0001111101001101
Printing partitions[2]: 0000101100001111
Printing partitions[3]: 0000001000011111
Printing partitions[4]: 0100111100010011
Printing partitions[5]: 0100111000111100
I've got a function which, as is, works correctly. However the rest of the program has a limitation in that I've preset the size of the array (the space to be allocated). Obviously, this is problematic should an event arise in which I need extra space for that array. So I want to add dynamic allocation of memory into my program.
But I'm having an issue with the whole pointer to a pointer concept, and I've utterly failed to find an online explanation that makes sense to me...
I think I'll want to use malloc(iRead + 1) to get an array of the right size, but I'm not sure what that should be assigned to... *array? **array? I'm not at all sure.
And I'm also not clear on my while loops. &array[iRead] will no longer work, and I'm not sure how to get a hold of the elements in the array when there's a pointer to a pointer involved.
Can anyone point (heh pointer pun) me in the right direction?
I can think of the following approaches.
First approach
Make two passes through the file.
In the first pass, read the numbers and discard them but keep counting the number of items.
Allocate memory once for all the items.
Rewind the file and make a second pass through it. In the second pass, read and store the numbers.
int getNumberOfItems(FILE* fp, int hexi)
{
int numItems = 0;
int number;
char const* format = (hexi == 0) ? "%X" : "%d";
while (fscanf(fp, format, &number) > 0) {
++numItems;
return numItems;
}
void read(int *array, FILE* fp, int numItems, int hexi)
{
int i = 0;
char const* format = (hexi == 0) ? "%X" : "%d";
for ( i = 0; i < numItems; ++i )
fscanf(fp, format, &array[i]);
}
int main(int argc, char** argv)
{
int hexi = 0;
FILE* fp = fopen(argv[1], "r");
// if ( fp == NULL )
// Add error checking code
// Get the number of items in the file.
int numItems = getNumberOfItems(fp, hexi);
// Allocate memory for the items.
int* array = malloc(sizeof(int)*numItems);
// Rewind the file before reading the data
frewind(fp);
// Read the data.
read(array, fp, numItems, hexi);
// Use the data
// ...
// ...
// Dealloate memory
free(array);
}
Second approach.
Keep reading numbers from the file.
Every time you read a number, use realloc to allocate space the additional item.
Store the in the reallocated memory.
int read(int **array, char* fpin, int hexi)
{
int number;
int iRead = 0;
// Local variable for ease of use.
int* arr = NULL;
char const* format = (hexi == 0) ? "%X" : "%d";
FILE *fp = fopen(fpin, "r");
if (NULL == fp){
printf("File open error!\n");
exit(-1);
}
while (fscanf(fp, format, &number) > 0) {
arr = realloc(arr, sizeof(int)*(iRead+1));
arr[iRead] = number;
iRead += 1;
}
fclose(fp);
// Return the array in the output argument.
*array = arr;
return iRead;
}
int main(int argc, char** argv)
{
int hexi = 0;
int* array = NULL;
// Read the data.
int numItems = read(&array, argv[1], hexi);
// Use the data
// ...
// ...
// Dealloate memory
free(array);
}
int read(int **array, char* fpin, int hexi) {
int iRead = 0;
int i, *ary;
char *para;
FILE *fp;
fp = fopen(fpin, "r");
if (NULL == fp){
printf("File open error!\n");
exit(-1);
}
para = (hexi == 0) ? "%*X" : "%*d";
while (fscanf(fp, para)!= EOF)
++iRead;
ary = *array = malloc(iRead*sizeof(int));
if(ary == NULL){
printf("malloc error!\n");
exit(-2);
}
rewind(fp);
para = (hexi == 0) ? "%X" : "%d";
for(i = 0; i < iRead; ++i)
fscanf(fp, para, &ary[i]);
fclose(fp);
return iRead;
}
I'd suggest something like this:
int read(int **array_pp, char* fpin, int hexi) {
...
int *array = malloc (sizeof (int) * n);
for (int i=0; i < n; i++)
fscanf(fp, "%X",&array[i]);
...
*array_pp = array;
return n;
}
Notes:
1) You must use "**" if you want to return a pointer in a function argument
2) If you prefer, however, you can declare two pointer variables (array_pp and array) to simplify your code.
I think you wouldn't call it an array. Arrays are of fixed size and lie on the stack. What you need (as you already said), is dynamically allocated memory on the heap.
maybe that's why you didn't find much :)
here are some tutorials:
http://en.wikibooks.org/wiki/C_Programming/Arrays (and following pages)
http://www.eskimo.com/~scs/cclass/int/sx8.html
you got the function declaration correctly:
int read(int **array, char* fpin, int hexi)
What you need to do:
find out how much memory you need, eg. how many elements
allocate it with *array = malloc(numElements * sizeof(int)) (read "at the address pointed by array allocate memory for numElements ints")
now you can (*array)[idx] = some int (read "at the address pointed by array, take the element with index idx and assign some int to it")
call it with int* destination; int size = read(&destination, "asdf", hexi)
hope it helps..
I have an ASCII file in which the entries of a vector are stored. I do not know the length (number of rows) of the file, nor do I have an estimation about its size as it may strongly vary from a few lines to some tens of thousands. I need an efficient way to read the data stored in that file and load them to a float* variable. The code should be in C.
My question is how to allocate memory for the vector which I need to create given that I do not know its size beforehand? Can you please give an example?
Finally, what is in your opinion the most appropriate prototype for such a function? Should it be something like:
load_data(const char* filename, float* data, int* vector_size);
?
Update 1.: While doing some initial tests, I wrote the following code:
void create_random_matrix(float* matrix, const int nrows) {
matrix = (float *) malloc(sizeof (float) * nrows);
short i;
for (i = 0; i < nrows; i++) {
matrix[i] = 7.0f;
}
}
which should return an array with all its elements equal to 7.0f. Instead, when I call it from my main.c:
float *a;
create_random_matrix(a, 10);
printf("%f",a[0]);
it prints 0.0f. How is that possible?!
Update 2. Was it not for your help, the following (working) code wouldn't have been written:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#define LINE_SIZE 10
#define ALLOCATION_CHUNK 50
int load_vector_data(const char* filename, float** vector, int* length) {
*vector = malloc(sizeof (float) * ALLOCATION_CHUNK);
int allocated_rows = ALLOCATION_CHUNK;
u_short i = 0;
FILE* fr = fopen(filename, "r");
if (fr == NULL) {
exit(FILE_NOT_FOUND);
}
char line[LINE_SIZE];
while (fgets(line, LINE_SIZE, fr) != NULL) {
if (i >= allocated_rows){
allocated_rows += ALLOCATION_CHUNK;
*vector = realloc(*vector, sizeof (float) * allocated_rows);
}
strip_newline(&line, LINE_SIZE);
(*vector)[i] = strtod(line, (char **) NULL);
i++;
}
*length = i;
*vector = realloc(*vector, sizeof (float) * i);
fclose(fr);
}
void strip_newline(char *str, int size) {
u_short i;
for (i = 0; i < size; ++i) {
if (str[i] == '\n') {
str[i] = '\0';
return;
}
}
}
I tried it with an 8000-lines file and seems to be working just fine! Please, feel free to comment.
fgets is you friend for reading the data from the file (If my assumption that each bit of data is on a new-line is correct). Read each line in 1 by 1 and use an strtof on the text you read. Reading text and converting to floats is inherently a slow process so I reckon the above is perfectly good enough.
As for your second question there are a couple of ways to do it. You could pass a float** in and make the malloc inside the function. Though this has the disadvantage of you needing to free it outside the function which is not exactly obvious. The only other way, i can think of, would be to scan through the file and count the number of new-lines then pre-allocate the array length for that.
Its hard to say whether doing a malloc and a bunch of reallocs would be more efficient than the scan through to count the number of lines, it would probably be worth trying both methods (neither is particularly hard) and seeing which one is faster for you.
So i am trying to read a text file line by line and save each line into a char array.
From my printout in the loop I can tell it is counting the lines and the number of characters per line properly but I am having problems with strncpy. When I try to print the data array it only displays 2 strange characters. I have never worked with strncpy so I feel my issue may have something to do with null-termination.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char* argv[])
{
FILE *f = fopen("/home/tgarvin/yes", "rb");
fseek(f, 0, SEEK_END);
long pos = ftell(f);
fseek(f, 0, SEEK_SET);
char *bytes = malloc(pos); fread(bytes, pos, 1, f);
int i = 0;
int counter = 0;
char* data[counter];
int length;
int len=strlen(data);
int start = 0;
int end = 0;
for(; i<pos; i++)
{
if(*(bytes+i)=='\n'){
end = i;
length=end-start;
data[counter]=(char*)malloc(sizeof(char)*(length)+1);
strncpy(data[counter], bytes+start, length);
printf("%d\n", counter);
printf("%d\n", length);
start=end+1;
counter=counter+1;
}
}
printf("%s\n", data);
return 0;
}
Your "data[]" array is declared as an array of pointers to characters of size 0. When you assign pointers to it there is no space for them. This could cause no end of trouble.
The simplest fix would be to make a pass over the array to determine the number of lines and then do something like "char **data = malloc(number_of_lines * sizeof(char *))". Then doing assignments of "data[counter]" will work.
You're right that strncpy() is a problem -- it won't '\0' terminate the string if it copies the maximum number of bytes. After the strncpy() add "data[counter][length ] = '\0';"
The printf() at the end is wrong. To print all the lines use "for (i = 0; i < counter; i++) printf("%s\n", data[counter]);"
Several instances of bad juju, the most pertinent one being:
int counter = 0;
char* data[counter];
You've just declared data as a variable-length array with zero elements. Despite their name, VLAs are not truly variable; you cannot change the length of the array after allocating it. So when you execute the lines
data[counter]=(char*)malloc(sizeof(char)*(length)+1);
strncpy(data[counter], bytes+start, length);
data[counter] is referring to memory you don't own, so you're invoking undefined behavior.
Since you don't know how many lines you're reading from the file beforehand, you need to create a structure that can be extended dynamically. Here's an example:
/**
* Initial allocation of data array (array of pointer to char)
*/
char **dataAlloc(size_t initialSize)
{
char **data= malloc(sizeof *data * initialSize);
return data;
}
/**
* Extend data array; each extension doubles the length
* of the array. If the extension succeeds, the function
* will return 1; if not, the function returns 0, and the
* values of data and length are unchanged.
*/
int dataExtend(char ***data, size_t *length)
{
int r = 0;
char **tmp = realloc(*data, sizeof *tmp * 2 * *length);
if (tmp)
{
*length= 2 * *length;
*data = tmp;
r = 1;
}
return r;
}
Then in your main program, you would declare data as
char **data;
with a separate variable to track the size:
size_t dataLength = SOME_INITIAL_SIZE_GREATER_THAN_0;
You would allocate the array as
data = dataAlloc(dataLength);
initially. Then in your loop, you would compare your counter against the current array size and extend the array when they compare equal, like so:
if (counter == dataLength)
{
if (!dataExtend(&data, &dataLength))
{
/* Could not extend data array; treat as a fatal error */
fprintf(stderr, "Could not extend data array; exiting\n");
exit(EXIT_FAILURE);
}
}
data[counter] = malloc(sizeof *data[counter] * length + 1);
if (data[counter])
{
strncpy(data[counter], bytes+start, length);
data[counter][length] = 0; // add the 0 terminator
}
else
{
/* malloc failed; treat as a fatal error */
fprintf(stderr, "Could not allocate memory for string; exiting\n");
exit(EXIT_FAILURE);
}
counter++;
You are trying to print data with a format specifier %s, while your data is a array of pointer s to char.
Now talking about copying a string with giving size:
As far as I like it, I would suggest you to use
strlcpy() instead of strncpy()
size_t strlcpy( char *dst, const char *src, size_t siz);
as strncpy wont terminate the string with NULL,
strlcpy() solves this issue.
strings copied by strlcpy are always NULL terminated.
Allocate proper memory to the variable data[counter]. In your case counter is set to 0. Hence it will give segmentation fault if you try to access data[1] etc.
Declaring a variable like data[counter] is a bad practice. Even if counter changes in the subsequent flow of the program it wont be useful to allocate memory to the array data.
Hence use a double char pointer as stated above.
You can use your existing loop to find the number of lines first.
The last printf is wrong. You will be printing just the first line with it.
Iterate over the loop once you fix the above issue.
Change
int counter = 0;
char* data[counter];
...
int len=strlen(data);
...
for(; i<pos; i++)
...
strncpy(data[counter], bytes+start, length);
...
to
int counter = 0;
#define MAX_DATA_LINES 1024
char* data[MAX_DATA_LINES]; //1
...
for(; i<pos && counter < MAX_DATA_LINES ; i++) //2
...
strncpy(data[counter], bytes+start, length);
...
//1: to prepare valid memory storage for pointers to lines (e.g. data[0] to data[MAX_DATA_LINES]). Without doing this, you may hit into 'segmentation fault' error, if you do not, you are lucky.
//2: Just to ensure that if the total number of lines in the file are < MAX_DATA_LINES. You do not run into 'segmentation fault' error, because the memory storage for pointer to line data[>MAX_DATA_LINES] is no more valid.
I think that this might be a quicker implementation as you won't have to copy the contents of all the strings from the bytes array to a secondary array. You will of course lose your '\n' characters though.
It also takes into account files that don't end with a new line character and as pos is defined as long the array index used for bytes[] and also the length should be long.
#include <stdio.h>
#include <stdlib.h>
#define DEFAULT_LINE_ARRAY_DIM 100
int main(int argc, char* argv[])
{
FILE *f = fopen("test.c", "rb");
fseek(f, 0, SEEK_END);
long pos = ftell(f);
fseek(f, 0, SEEK_SET);
char *bytes = malloc(pos+1); /* include an extra byte incase file isn't '\n' terminated */
fread(bytes, pos, 1, f);
if (bytes[pos-1]!='\n')
{
bytes[pos++] = '\n';
}
long i;
long length = 0;
int counter = 0;
size_t size=DEFAULT_LINE_ARRAY_DIM;
char** data=malloc(size*sizeof(char*));
data[0]=bytes;
for(i=0; i<pos; i++)
{
if (bytes[i]=='\n') {
bytes[i]='\0';
counter++;
if (counter>=size) {
size+=DEFAULT_LINE_ARRAY_DIM;
data=realloc(data,size*sizeof(char*));
if (data==NULL) {
fprintf(stderr,"Couldn't allocate enough memory!\n");
exit(1);
}
}
data[counter]=&bytes[i+1];
length = data[counter] - data[counter - 1] - 1;
printf("%d\n", counter);
printf("%ld\n", length);
}
}
for (i=0;i<counter;i++)
printf("%s\n", data[i]);
return 0;
}