Below is a part of a longer code where malloc'ing for a 2D array is done. Could anyone tell if this is correct? If I introduce static values, the code works fine. Else, seg faults...
enum { LEN = 1024*8 };
char **tab = NULL;
int cur_LEN = LEN;
int count_lineMax = 0;
tab = malloc(count_lineMax * sizeof(*tab));
memset(tab, 0, count_lineMax * sizeof(*tab));
if(tab == NULL && count_lineMax) {
printf("Mem_check\n");
exit(1);
}
for(k=0;k<count_lineMax;k++) {
tab[k] = malloc(cur_LEN*sizeof(*tab[k]));
memset(tab[k], 0, cur_LEN*sizeof(*tab[k]));
if(tab[k] == NULL) {
printf("Mem_check*\n");
exit(1);
}
}
for(l=0;l<count_lineMax;l++) {
free(tab[l]);
}
free(tab);
int count_lineMax = 0;
tab = malloc(count_lineMax * sizeof(*tab));
What is this? You are gonna malloc 0 bytes?
There are at least two ways to build the table while reading lines. One uses the property of realloc() that if its first argument is a null pointer, it will behave like malloc() and allocate the requested space (so the code can be self-starting, using realloc() alone). That code might look like:
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { LEN = 1024*8 };
static void error(const char *fmt, ...);
static char *xstrdup(const char *str);
int main(void)
{
char line[LEN];
char **tab = NULL;
int tabsize = 0;
int lineno = 0;
while (fgets(line, sizeof(line), stdin) != 0)
{
if (lineno >= tabsize)
{
size_t newsize = (tabsize + 2) * 2;
char **newtab = realloc(tab, newsize * sizeof(*newtab));
if (newtab == 0)
error("Failed to allocate %zu bytes of memory\n", newsize * sizeof(*newtab));
tab = newtab;
tabsize = newsize;
}
tab[lineno++] = xstrdup(line);
}
/* Process the lines */
for (int i = 0; i < lineno; i++)
printf("%d: %s", i+1, tab[i]);
/* Release the lines */
for (int i = 0; i < lineno; i++)
free(tab[i]);
free(tab);
return(0);
}
static void error(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(1);
}
static char *xstrdup(const char *str)
{
size_t len = strlen(str) + 1;
char *copy = malloc(len);
if (copy == 0)
error("Failed to allocate %zu bytes of memory\n", len);
memmove(copy, str, len);
return(copy);
}
The alternative uses malloc() explicitly when the table is empty, and is most simply coded as:
int main(void)
{
char line[LEN];
int tabsize = 4;
int lineno = 0;
char **tab = malloc(tabsize * sizeof(*tab));
if (tab == 0)
error("Failed to allocate %zu bytes of memory\n", tabsize * sizeof(*tab));
...
Everything else can remain untouched.
Note that it can be convenient to have functions xmalloc() and xrealloc() which are guaranteed never to return a null pointer because they report an error instead:
static void *xmalloc(size_t nbytes)
{
void *space = malloc(nbytes);
if (space == 0)
error("Failed to allocate %zu bytes of memory\n", nbytes);
return(space);
}
static void *xrealloc(void *buffer, size_t nbytes)
{
void *space = realloc(buffer, nbytes);
if (space == 0)
error("Failed to reallocate %zu bytes of memory\n", nbytes);
return(space);
}
In the long term (big programs), this cuts down on the total number of times you write 'out of memory' error messages. On the other hand, if you must recover from memory allocation failures (save the user's work, etc), then this is not an appropriate strategy.
The code above creates a ragged array; different entries in tab have different lengths. If you want homogeneous lengths (as in the original code), then you have to replace or modify the xstrdup() function to allocate the maximum length.
You might have noticed that the code in xstrdup() used memmove() instead strcpy(). That's because strlen() already measured how long the string is, so there's no need for the copy code to test each byte to see whether it needs copying. I used memmove() because it can never go wrong, even if the strings overlap, even though in this context it is clear that the strings can never overlap and so memcpy() - which is not guaranteed to work correctly if the strings overlap - could have been used since the strings cannot overlap.
The strategy of allocating (oldsize + 2) * 2 new entries means that the memory reallocation code is exercised often enough during testing, without unduly impacting performance in production. See The Practice of Programming by Kernighan and Pike for a discussion of why this is a good idea.
I almost always use a set of functions similar to the error() function because it dramatically simplifies error reporting. The functions I normally use are part of a package that records and reports the program name (from argv[0]) as well, and that has a fairly wide range of alternative behaviours.
Related
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 13 days ago.
Improve this question
One of the most painful work to do in C is to figure out the best and most efficient way to work with strings. Been searching and trying things for almost a day now. Anyways, basically, I was trying to create a dynamic string builder function, so here's what I came up with:
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
void *alloc(void **ptr, size_t len) {
free(*ptr);
void *mem = malloc(len);
if (mem == NULL) {
fprintf(stderr, "err: alloc failed");
exit(EXIT_FAILURE);
}
return mem;
}
char *str(char **src, const char *fmt, ...) {
if (fmt == NULL) {
return NULL;
}
va_list args;
va_start(args, fmt);
size_t len = vsnprintf(NULL, 0, fmt, args);
if (len < 0) {
return NULL;
}
*src = alloc((void **) src, sizeof(char) * (len + 1));
vsnprintf(*src, len + 1, fmt, args);
va_end(args);
return *src;
}
And here's how I currently use it:
int main(int argc, char *argv[]) {
char *s = str(&s, "FOUND!");
puts(s);
puts(str(&s, "NOT %s", s));
puts(s);
return 0;
}
While it does work, I'm thinking about things like:
Possible memory leaks.
And anything I'm doing wrong with dynamic memory allocation.
As well as, how I initialize a char * with str(&pointer, "value") (if passing uninitiated &pointer is good or bad idea).
Is there anything wrong with the code and possible fix/improvements?
EDIT
Maybe I shouldn't have done this in the first place XD
Fixed according to answers/suggestions:
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
void *alloc(void *ptr, size_t len) {
void *mem = realloc(ptr, len);
if (mem == NULL) {
free(ptr);
perror(NULL);
}
return mem;
}
char *str(char *src, const char *fmt, ...) {
if (fmt == NULL) {
return NULL;
}
va_list args1, args2;
va_copy(args2, args1);
va_start(args1, fmt);
int len = vsnprintf(NULL, 0, fmt, args1);
va_end(args1);
if (len < 0) {
return NULL;
}
char *val = alloc(src, len + 1);
va_start(args2, fmt);
int res = vsnprintf(val, len + 1, fmt, args2);
va_end(args2);
free(src);
if (res < 0) {
free(val);
return NULL;
}
return val;
}
int main(int argc, char *argv[]) {
char *s = str(NULL, "FOUND!");
puts(s);
free(s);
s = str(s, "NOT %s", s);
puts(s);
free(s);
return 0;
}
What's changed (and notes (Ns) for future reference):
N: Passing uninitialized &pointer is wrong and is actually an error in disguise.
N: Use realloc instead of malloc which handles NULL initial value and deallocates previous allocated memory.
Changed ** parameters to just *. The initial idea behind ** is to be able to directly modify the *src pointer's value within the function (which somehow works in original code).
Initial declaration changed to char *s = str(NULL, "value"), and updating the value needs to be reassigned s = str(s, "updated: %s", s).
Changed size_t len to int for vsnprintf's return value. size_t is what I've initially used to match malloc's size_t parameter.
N: 2nd vsnprintf call might return different result after the first call, so copied the first va_list using va_copy for the 2nd call.
free old string before returning the actual value.
While it does work
No. It fails in some subtle ways.
Memory free'd too soon
str(&s, "NOT %s", s) performs the free() of s and then attempts to read s: alloc() (with its free()) occurs before 2nd vsnprintf(). This is a fundamental design problem.
Indeterminant value
Code fails with char *s = str(&s, "FOUND!"); as str() eventually attempts free(*ptr); and the value of *ptr is indeterminant.
Missing va_end()
Code should always call va_end() after a va_start().
if (len < 0) {
va_end(args); // add
return NULL;
}
Advanced: malloc(0)
alloc(), as used here, never calls malloc(0). Yet as a general purpose function alloc(..., 0) is possible. In that corner case, malloc(0), returning NULL is not a problem and code should not error out.
Advanced: 2nd vsnprintf() may fail
In multi-thread applications, the 2nd vsnprintf() may return a different value than the first. Code could check the 2nd return value too, but we are getting into some deep issues.
Wrong type
size_t len = vsnprintf(NULL, 0, fmt, args); if (len < 0) is never true. Check the return value of vsnprintf() by saving in a signed type.
Pedantic: vsnprintf() returns an int. In rare implementations, size_t has a narrower range than int. Should you care to cope with such:
//size_t len = vsnprintf(NULL, 0, fmt, args);
//if (len < 0) {
int len = vsnprintf(NULL, 0, fmt, args);
if (len < 0 || len >= SIZE_MAX) {
Improvements:
Compile with all warnings enabled. That will save time.
alloc() is strange to free(*str) and then not update *str.
[Edit] Code needs to free after printing.
OP wants to do str(&s, "NOT %s", s) which obliges s to remain stable throughout the print stage.
Code then must allocate for the new string, do the print and then free the old string.
`alloc()` appears to nearly act like `realloc()`.
Instead, return an error status and use strdup() (or a like function that handles NULL) for initialization.
Alternative untested code:
// Return error flag
bool alloc_alt(void **ptr, size_t len) {
if (len > 0) {
void *mem = realloc(*ptr, len);
if (mem == NULL) {
return true;
// or
fprintf(stderr, "err: alloc(%p, %zu) failed", (void*)ptr, len);
exit(EXIT_FAILURE);
}
*ptr == mem;
} else {
free(*ptr);
*ptr == NULL;
}
return false;
}
With str(), consider updating *src even when fmt == NULL or vsnprintf() fails - maybe free(*src); *src = NULL;?
Your functions alloc() and str() seem fine. You should maybe consider using realloc(), which could give you a small memory use/speed improvement.
You must ensure that you never pass an uninitialized pointer to str(), or your application will crash. Any new string pointer should be zero-initialized before calling the function. Why do you pass a pointer to pointer to the function? You are getting the resulting string back, after all, passing the initial pointer by value would help mitigate the problem, ex:
// expecting a pointer, not a pointer to pointer.
void* alloc(void* p, int len) {
void* result;
result = realloc(p, len);
if (!result) {
free(p);
exit(1);
}
return result;
}
char *str(char *src, const char *fmt, ...) {
char* result;
// figure out len
// ...
result = (char*)alloc(src, len + 1);
// fill the result string
// ...
return result;
}
// now str() can also be called as:
// ...
char* s;
s = str(NULL, "hello %s", "Mike");
// ...
free(s);
You are right to worry about memory leaks. Every single string you create using str() MUST be deallocated using free().
I am trying to replicate memcpy function, but when I try with NULL as both parameters but with size (5 for example) the original function gives the abort error but my program writes random characters.
void *ft_memcpy(void *dst, const void *src, size_t n)
{
size_t i;
char *d;
char *s;
s = (char*)src;
d = (char*)dst;
i = 0;
while (i < n)
{
d[i] = s[i];
i++;
}
i = 0;
return (dst);
}
int main()
{
char dst[0];
char src[0];
size_t n = 5;
printf("%s", ft_memcpy(dst, src, n));
printf("%s\n", memcpy(dst, src, n));
return (0);
}
src and dst have size 0, which is one way of specifying flexible arrays in C. You usually only define them inside structures that are going to be dynamically allocated, for example:
struct buffer {
size_t len;
char bytes[0]
};
#define NBYTES 8
struct buffer* ptr = malloc(sizeof(struct buffer) + NBYTES * sizeof(char));
const char* src = "hello!";
ptr->len = strlen(src);
memcpy(ptr->bytes, src, ptr->len);
Basically, indexing any of those arrays in your example will end up in a buffer overflow (you are accessing beyond the limits of the array).
The difference between this and passing NULL as parameters is that src and dst point to valid memory (main function stack). In C a buffer overflow has no defined behaviour (undefined behaviour), so the compiler is free to do what it wants. If you use a memory sanitizer (compile with -fsanitize=address) it will warn you about this problem and ask you to fix the error.
I recommend you using a debugger or add the following print statements in your copy function:
printf("%s: src: %p, dst: %p\n", __func__, src, dst);
See Array of zero length
Update: since you asked how to generate the abort error, the easiest and most convenient way for this scenario is using assertions.
#include <assert.h>
void function(void *addr) {
assert(addr != NULL);
}
will abort the execution if the condition addr != NULL evaluates to false.
Assertions are very useful to expose what conditions you assume will always be valid and for whom you don't want to pay the cost of checking them when you build the code for production, since these checks may have a performance impact. You can disable them by compiling with the flag -DNDEBUG.
See also: When should we use asserts in C?
Another way is making the program to simply abort:
#include <cstdlib.h>
void function(void *addr) {
if(addr == NULL) abort();
}
or to set errno variable to EINVAL:
#include <errno.h>
void function(void *addr) {
if (addr == NULL) {
errno = EINVAL;
return;
}
}
I have a function A with a body like so
char* A (const char* arg) {
char ret[8192];
B(ret);
return strdup(ret);
}
Function B looks like this (some pseudo code on iteration logic for brevity)
void B(char* ret) {
char retString[8192];
while(ITERATIONS_LEFT) {
snprintf(returnString, 8192, "\n Format %s\n\n", IT_VALUE);
snprintf(returnString, 8192, "\n Val %s\n\n", IT_VALUE_2);
}
strcpy(ret, returnString);
}
So essentially I have a function A that gives another function B a string buffer for B to enter formatted data into. Now this works fine as long as the total data returned from the iterations does not exceed 8196 (just a guess at a 'sufficiently large' value) but I think it would be better if I could do this dynamically and not have to worry about the case where my buffer fills. How would I achieve this in a fairly efficient manner, with the constraints that function A must still call function B, and that B's signature can be changed but A's cannot?
In addition to your allocation problem, you overwrite the same string here:
snprintf(returnString, 8192, "\n Format %s\n\n", IT_VALUE);
snprintf(returnString, 8192, "\n Val %s\n\n", IT_VALUE_2);
You could solve this with a kind of "appender" that re-allocates memory as it is needed by determining the required length by passing a size of 0 to snprintf as Joachim Pileborg suggested:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
struct append_t {
char *str; /* string */
size_t len; /* length of string */
size_t size; /* allocated size */
};
void append(struct append_t *app, const char *fmt, ...)
{
va_list arg;
size_t len;
va_start(arg, fmt);
len = vsnprintf(NULL, 0, fmt, arg);
va_end(arg);
while (app->len + len + 1 >= app->size) {
app->size = app->size ? app->size * 2 : 0x100;
app->str = realloc(app->str, app->size);
// Check and handle error
}
va_start(arg, fmt);
len = vsnprintf(app->str + app->len, app->size - app->len, fmt, arg);
va_end(arg);
app->len += len;
}
int main(int argc, char **argv)
{
struct append_t app = {NULL};
for (int i = 1; i < argc; i++) {
if (i > 1) append(&app, ", ");
append(&app, "'%s'", argv[i]);
}
if (app.str) puts(app.str);
free(app.str);
return 0;
}
Things to note:
The code uses the fact that realloc(NULL, size) behaves like malloc(size). The appender must be initialised to all zero.
vsnprintf is a variant of snprintf that takes a va_list instead of variadic arguments. The v...printf functions allow you to write your own printf-like functions. You can't pass variadic arguments to other functions, you have to create a va_list with the va_... macros from the <stdarg.h> header.
Most compilers can detect mismatches between printing formats and arguments for the standard printf functions. If you wat to benefit from these checks for your function, you could use the appropriate GCC attributes ((format(printf, 2, 3)) or the SAL annotation _Printf_format_string_.
In your example, A would create the appender and pass it to B and then return its .str. You could also return an appender from B and return its .strfrom A.
I suggest the following versions of A and B.
char* A (const char* arg) {
int size = 8192;
char *ret = malloc(size);
B(ret, size);
return ret;
}
void B(char* ret, int size) {
int pos = 0, required;
while(ITERATIONS_LEFT) {
required = snprintf(NULL, 0, "\n Format %s\n\n", IT_VALUE);
if (pos + required >= size) {
size *= 2;
ret = realloc(ret, size);
}
pos += sprintf(ret + pos, "\n Format %s\n\n", IT_VALUE);
required = snprintf(NULL, 0, "\n Val %s\n\n", IT_VALUE_2);
if (pos + required >= size) {
size *= 2;
ret = realloc(ret, size);
}
pos += sprintf(ret + pos, "\n Val %s\n\n", IT_VALUE_2);
}
}
Note that:
buffer size is doubled if it isn't enough. That works well in most cases.
copying is minimized (no strdup or strcpy)
you may want to make a new function with the repeated code in the while loop
In your version of B buffer is overwritten each time you call snprintf. Here the writing position (pos) is updated to append (null terminating char overwritten)
snprintf with NULL argument will return the required buffer size without printing anything anywhere
You may want to check that ret is not NULL after calling realloc
If ret is not NULL it's certain that the buffer is large enough. Thus simple sprintf is used to actually print.
Remember to free the buffer!
I haven't tested the code myself
I wrote a variadic C function which mission is to allocate the needed memory for a buffer, and then sprintf the args given to this function in that buffer. But I'm seeing a strange behavior with it. It works only once. If I have two calls for this function it segfaults.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
char *xsprintf(char * fmt, ...)
{
va_list ap;
char *part;
char *buf;
size_t len = strlen(fmt)+1;
va_start(ap, fmt);
while (part = va_arg(ap, char *))
len += strlen(part);
va_end(ap);
buf = (char*) malloc(sizeof(char)*len);
va_start(ap, fmt);
vsprintf(buf, fmt, ap);
va_end(ap);
return buf;
}
int main(int argc, const char *argv[])
{
char *b;
b = xsprintf("my favorite fruits are: %s, %s, and %s", "coffee", "C", "oranges");
printf("size de buf is %d\n", strlen(b)); //this works. After it, it segfaults.
/*
free(b);
b = NULL;
*/
b = xsprintf("my favorite fruits are: %s, %s, and %s", "coffee", "C", "oranges");
printf("size de buf is %d\n", strlen(b));
printf("%s", b);
return 0;
}
here's the output of this program:
size de buf is 46
[1] 4305 segmentation fault ./xsprintftest
Am I doing something wrong? Shouldn't I have used va_start multiple times in a single function? Do you have any alternatives? Thanks a lot! :)
You should be using vsnprintf. Use it twice. Once with a NULL destination/zero size to find out the length of the buffer you need to allocate, then a second time to fill the buffer. That way your function will work even if all the arguments are not strings.
As written, it will fail if there are any non-string arguments (%d, %x, %f, etc.). And counting the number of % characters is not a valid way to get the number of arguments. Your result could be too many (if there are literal % characters encoded as %%) or too few (if arguments are also needed for %*s, %.*d, etc. width/precision specifiers).
Pass NULL as the last arg to xsprintf():
b = xsprintf("my favorite fruits are: %s, %s, and %s",
"coffee", "C", "oranges", (void*)0);
Then your while() loop will see the NULL and terminate properly.
As R.. mentions in the comment below and in another answer, the xsprintf function will fail if there are other format arguments. You are better off using vsprintf as explained in the other answer.
My intent here was simply to demonstrate the use of a sentinel with va_arg.
First off, try using vsnprintf. It's just a good idea.
That's not your problem, though. Your problem is that you can't call va_arg more times than there are arguments. It doesn't return the number of arguments. You must either pass in a parameter telling it how many there are, or extract the number of special tokens in the format string to figure out how many there must implicitly be.
That's the reason why printf can choke your program if you pass it too few arguments; it will just keep pulling things off the stack.
The problem is that in the bit of code where you're accessing the va_arg() list without a particular defined end:
va_start(ap, fmt);
while (part = va_arg(ap, char *))
len += strlen(part);
va_end(ap);
The stdargs.h facilities don't have any built-in method for determining when the end of the va_list() occurs - you need to have that explicitly done by a convention you come up with. Either using a sentinel value (as in bstpierre's answer), or by having a count provided. A count can be an explicit parameter that's provided, or it can be implicit (such as by counting the number of format specifiers in the format string like the printf() family does).
Of course, you also have the issue that your code currently only supports the one kind of format-specifier (%s), but I assumed that that's intentional at this point.
Thanks a lot for your answers and ideas! So I rewrote my function like this:
void fatal(const char *msg)/*{{{*/
{
fprintf(stderr, "program: %s", msg);
abort ();
}/*}}}*/
void *xmalloc(size_t size)/*{{{*/
{
register void *value = malloc(size);
if (value == 0)
fatal ("Virtual memory exhausted");
return value;
}/*}}}*/
void *xrealloc(void *ptr, size_t size)/*{{{*/
{
register void *value = realloc(ptr, size);
if (value == 0)
fatal ("Virtual memory exhausted");
return value;
}/*}}}*/
char *xsprintf(const char *fmt, ...)/*{{{*/
{
/* Heavily inspired from http://perfec.to/vsprintf/pasprintf */
va_list args;
char *buf;
size_t bufsize;
char *newbuf;
size_t nextsize;
int outsize;
int FIRSTSIZE = 20;
bufsize = 0;
for (;;) {
if(bufsize == 0){
buf = (char*) xmalloc(FIRSTSIZE);
bufsize = FIRSTSIZE;
}
else{
newbuf = (char *)xrealloc(buf, nextsize);
buf = newbuf;
bufsize = nextsize;
}
va_start(args, fmt);
outsize = vsnprintf(buf, bufsize, fmt, args);
va_end(args);
if (outsize == -1) {
/* Clear indication that output was truncated, but no
* clear indication of how big buffer needs to be, so
* simply double existing buffer size for next time.
*/
nextsize = bufsize * 2;
} else if (outsize == bufsize) {
/* Output was truncated (since at least the \0 could
* not fit), but no indication of how big the buffer
* needs to be, so just double existing buffer size
* for next time.
*/
nextsize = bufsize * 2;
} else if (outsize > bufsize) {
/* Output was truncated, but we were told exactly how
* big the buffer needs to be next time. Add two chars
* to the returned size. One for the \0, and one to
* prevent ambiguity in the next case below.
*/
nextsize = outsize + 2;
} else if (outsize == bufsize - 1) {
/* This is ambiguous. May mean that the output string
* exactly fits, but on some systems the output string
* may have been trucated. We can't tell.
* Just double the buffer size for next time.
*/
nextsize = bufsize * 2;
} else {
/* Output was not truncated */
break;
}
}
return buf;
}/*}}}*/
And it's working like a charm! Thanks a million times :)
So i am trying to read a text file line by line and save each line into a char array.
From my printout in the loop I can tell it is counting the lines and the number of characters per line properly but I am having problems with strncpy. When I try to print the data array it only displays 2 strange characters. I have never worked with strncpy so I feel my issue may have something to do with null-termination.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char* argv[])
{
FILE *f = fopen("/home/tgarvin/yes", "rb");
fseek(f, 0, SEEK_END);
long pos = ftell(f);
fseek(f, 0, SEEK_SET);
char *bytes = malloc(pos); fread(bytes, pos, 1, f);
int i = 0;
int counter = 0;
char* data[counter];
int length;
int len=strlen(data);
int start = 0;
int end = 0;
for(; i<pos; i++)
{
if(*(bytes+i)=='\n'){
end = i;
length=end-start;
data[counter]=(char*)malloc(sizeof(char)*(length)+1);
strncpy(data[counter], bytes+start, length);
printf("%d\n", counter);
printf("%d\n", length);
start=end+1;
counter=counter+1;
}
}
printf("%s\n", data);
return 0;
}
Your "data[]" array is declared as an array of pointers to characters of size 0. When you assign pointers to it there is no space for them. This could cause no end of trouble.
The simplest fix would be to make a pass over the array to determine the number of lines and then do something like "char **data = malloc(number_of_lines * sizeof(char *))". Then doing assignments of "data[counter]" will work.
You're right that strncpy() is a problem -- it won't '\0' terminate the string if it copies the maximum number of bytes. After the strncpy() add "data[counter][length ] = '\0';"
The printf() at the end is wrong. To print all the lines use "for (i = 0; i < counter; i++) printf("%s\n", data[counter]);"
Several instances of bad juju, the most pertinent one being:
int counter = 0;
char* data[counter];
You've just declared data as a variable-length array with zero elements. Despite their name, VLAs are not truly variable; you cannot change the length of the array after allocating it. So when you execute the lines
data[counter]=(char*)malloc(sizeof(char)*(length)+1);
strncpy(data[counter], bytes+start, length);
data[counter] is referring to memory you don't own, so you're invoking undefined behavior.
Since you don't know how many lines you're reading from the file beforehand, you need to create a structure that can be extended dynamically. Here's an example:
/**
* Initial allocation of data array (array of pointer to char)
*/
char **dataAlloc(size_t initialSize)
{
char **data= malloc(sizeof *data * initialSize);
return data;
}
/**
* Extend data array; each extension doubles the length
* of the array. If the extension succeeds, the function
* will return 1; if not, the function returns 0, and the
* values of data and length are unchanged.
*/
int dataExtend(char ***data, size_t *length)
{
int r = 0;
char **tmp = realloc(*data, sizeof *tmp * 2 * *length);
if (tmp)
{
*length= 2 * *length;
*data = tmp;
r = 1;
}
return r;
}
Then in your main program, you would declare data as
char **data;
with a separate variable to track the size:
size_t dataLength = SOME_INITIAL_SIZE_GREATER_THAN_0;
You would allocate the array as
data = dataAlloc(dataLength);
initially. Then in your loop, you would compare your counter against the current array size and extend the array when they compare equal, like so:
if (counter == dataLength)
{
if (!dataExtend(&data, &dataLength))
{
/* Could not extend data array; treat as a fatal error */
fprintf(stderr, "Could not extend data array; exiting\n");
exit(EXIT_FAILURE);
}
}
data[counter] = malloc(sizeof *data[counter] * length + 1);
if (data[counter])
{
strncpy(data[counter], bytes+start, length);
data[counter][length] = 0; // add the 0 terminator
}
else
{
/* malloc failed; treat as a fatal error */
fprintf(stderr, "Could not allocate memory for string; exiting\n");
exit(EXIT_FAILURE);
}
counter++;
You are trying to print data with a format specifier %s, while your data is a array of pointer s to char.
Now talking about copying a string with giving size:
As far as I like it, I would suggest you to use
strlcpy() instead of strncpy()
size_t strlcpy( char *dst, const char *src, size_t siz);
as strncpy wont terminate the string with NULL,
strlcpy() solves this issue.
strings copied by strlcpy are always NULL terminated.
Allocate proper memory to the variable data[counter]. In your case counter is set to 0. Hence it will give segmentation fault if you try to access data[1] etc.
Declaring a variable like data[counter] is a bad practice. Even if counter changes in the subsequent flow of the program it wont be useful to allocate memory to the array data.
Hence use a double char pointer as stated above.
You can use your existing loop to find the number of lines first.
The last printf is wrong. You will be printing just the first line with it.
Iterate over the loop once you fix the above issue.
Change
int counter = 0;
char* data[counter];
...
int len=strlen(data);
...
for(; i<pos; i++)
...
strncpy(data[counter], bytes+start, length);
...
to
int counter = 0;
#define MAX_DATA_LINES 1024
char* data[MAX_DATA_LINES]; //1
...
for(; i<pos && counter < MAX_DATA_LINES ; i++) //2
...
strncpy(data[counter], bytes+start, length);
...
//1: to prepare valid memory storage for pointers to lines (e.g. data[0] to data[MAX_DATA_LINES]). Without doing this, you may hit into 'segmentation fault' error, if you do not, you are lucky.
//2: Just to ensure that if the total number of lines in the file are < MAX_DATA_LINES. You do not run into 'segmentation fault' error, because the memory storage for pointer to line data[>MAX_DATA_LINES] is no more valid.
I think that this might be a quicker implementation as you won't have to copy the contents of all the strings from the bytes array to a secondary array. You will of course lose your '\n' characters though.
It also takes into account files that don't end with a new line character and as pos is defined as long the array index used for bytes[] and also the length should be long.
#include <stdio.h>
#include <stdlib.h>
#define DEFAULT_LINE_ARRAY_DIM 100
int main(int argc, char* argv[])
{
FILE *f = fopen("test.c", "rb");
fseek(f, 0, SEEK_END);
long pos = ftell(f);
fseek(f, 0, SEEK_SET);
char *bytes = malloc(pos+1); /* include an extra byte incase file isn't '\n' terminated */
fread(bytes, pos, 1, f);
if (bytes[pos-1]!='\n')
{
bytes[pos++] = '\n';
}
long i;
long length = 0;
int counter = 0;
size_t size=DEFAULT_LINE_ARRAY_DIM;
char** data=malloc(size*sizeof(char*));
data[0]=bytes;
for(i=0; i<pos; i++)
{
if (bytes[i]=='\n') {
bytes[i]='\0';
counter++;
if (counter>=size) {
size+=DEFAULT_LINE_ARRAY_DIM;
data=realloc(data,size*sizeof(char*));
if (data==NULL) {
fprintf(stderr,"Couldn't allocate enough memory!\n");
exit(1);
}
}
data[counter]=&bytes[i+1];
length = data[counter] - data[counter - 1] - 1;
printf("%d\n", counter);
printf("%ld\n", length);
}
}
for (i=0;i<counter;i++)
printf("%s\n", data[i]);
return 0;
}