Dynamic arrays with an embedded meta-information struct - c

I am fiddling around with an implementation of a generic dynamic array. The array should hold information about its size, how many entries are used, and then hold the actual data. The meta-information (size/used) is generic, but the data needs to handle different types, so I am handling that with macros. I am trying, however, to get the memory allocation code into functions. So my thought it is: I have a struct for meta-information
struct da_meta {
size_t size;
size_t used;
};
and then I have a macro that creates a struct per type, using a flexible array following the meta information:
#define dynarray(TYPE) \
struct { \
struct da_meta meta; \
TYPE data[]; \
}
I can declare an integer array, for example, as
dynarray(int) *int_array = 0;
To allocate and reallocate arrays, my thought was now to use code such as this:
#define size_overflow(meta_size, obj_size, len) \
((SIZE_MAX - meta_size) / obj_size < len)
// Always free if we cannot reallocate
void *realloc_dynarray_mem(void *p,
size_t meta_size,
size_t obj_size,
size_t new_len)
{
if (size_overflow(meta_size, obj_size, new_len))
goto abort;
struct da_meta *new_da =
realloc(p, meta_size + obj_size * new_len);
if (!new_da) goto abort;
new_da->size = new_len;
new_da->used = MIN(new_da->used, new_len);
return new_da;
abort:
free(p);
return 0;
}
The function gets the size of the struct sans the data objects, the size of individual objects, and the number of objects to allocate memory for. I don't use the size of the struct meta type, because it might be too small depending on the alignment of the data objects, but I will get it from sizeof the concrete (typed) structures. The function will always free the input and return NULL if I cannot allocate because in my application I have to give up if I cannot grow the array, so I don't try to preserve the old data in case there is an error.
There is nothing wrong with this code, as far as I can tell. I can always allocate memory, and as long as I have more than the size of struct meta, I can set the variables there. But when I return the result and use it as a dynarray(T) type, I am less sure. I think it should work, because C should put the memory of the first member of a struct first in a struct, and that is where I put struct meta, but am I right here?
I create a new array like this:
void *new_dynarray_mem(size_t meta_size,
size_t obj_size,
size_t len)
{
struct da_meta *array =
realloc_dynarray_mem(0, meta_size, obj_size, len);
if (array) {
// we do set size in realloc, but
array->size = len;
// if used was not initialised in realloc (and it wasn't)
// then we have to set it here...
array->used = 0;
}
return array;
}
#define new_da(type, init_size) \
new_dynarray_mem(sizeof(dynarray(type)), \
sizeof(type), init_size)
Here, the macro new_da() gets the size of the header/meta information from sizeof(dynarray(type)) and the size of the underlying types from sizeof(type). The second value is fine, but I am also uncertain about the first. Does the C standard guarantee that if I create two different structs with exactly the same code, e.g., calling dynarray(int) twice, that I get the same memory layout? I cannot imagine a compiler that would give me a different layout for the same code, but when it comes to imagining what compilers get up to, I am quite limited.
For appending to the array, I think all is fine. There I do not generate new types but get the size from the existing dynamic array, so if the first allocation is standard compliant, then I think the appending is as well, but I could be wrong.
#define da_free(da) \
do { free(da); da = 0; } while(0)
#define grow(size) \
(((size) == 0) ? /* special case for zero */ \
1 : \
((size) > SIZE_MAX / 2) ? /* can we grow? */ \
0 : /* no, then report size zero */ \
(2 * (size))) /* double the size */
#define da_append(da, ...) \
do { \
if (da->meta.used == da->meta.size) { \
size_t new_size = grow(da->meta.size); \
if (new_size == 0) { da_free(da); break; } \
da = realloc_dynarray_mem( \
da, sizeof *da, *da->data, new_size \
); \
if (!da) break; \
} \
da->data[da->meta.used++] = __VA_ARGS__; \
} while (0)
Am I guaranteed that if I lay out the concrete dynamic arrays with the meta-information at the top of the structs, then I can treat the allocate memory as both a pointer to the meta-information and the array? Is it safe to assume that I get the same size and memory layout if I generate the same struct twice? I feel that it must be that way since it shouldn't differ from if I include the same header file twice, but since I am generating the code there might be something that I am missing.
EDIT Based on the comments, I have updated the code to that below, but I have left the original code (of course) so the comments make sense in terms of that.
#define da_at(da,i) (da->data[(i)])
#define da_len(da) (da->meta.used)
struct da_meta {
size_t size;
size_t used;
};
#define dynarr(TYPE) \
struct { \
struct da_meta meta; \
TYPE data[]; \
}
// Always free if we cannot reallocate
void *realloc_dynarray_mem(struct da_meta *p,
size_t meta_size,
size_t obj_size,
size_t new_len)
{
// Size size overflow?
if (((SIZE_MAX - meta_size) / obj_size < new_len))
goto fail;
struct da_meta *new_da =
realloc(p, meta_size + obj_size * new_len);
if (!new_da) goto fail;
new_da->size = new_len;
new_da->used = MIN(new_da->used, new_len);
return new_da;
fail:
free(p);
return 0;
}
void *new_dynarray_mem(size_t meta_size,
size_t obj_size,
size_t len)
{
struct da_meta *array =
realloc_dynarray_mem(0, meta_size, obj_size, len);
if (array) array->used = 0;
return array;
}
void *grow_dynarray_mem(struct da_meta *p,
size_t meta_size,
size_t obj_size)
{
// Can we double the length?
size_t used = meta_size - obj_size * p->size;
size_t adding = MAX(1, p->size);
if ((SIZE_MAX - used) / obj_size < adding) {
free(p);
return 0;
}
return realloc_dynarray_mem(
p, meta_size, obj_size, p->size + adding
);
}
#define new_da(da, init_size) \
new_dynarray_mem(sizeof *(da), \
sizeof *(da)->data, \
(init_size))
#define da_free(da) \
do { free(da); da = 0; } while(0)
#define da_append(da, ...) \
do { \
if (da->meta.used == da->meta.size) { \
da = grow_dynarray_mem( \
(struct da_meta *)da, \
sizeof *da, sizeof *da->data \
); \
if (!da) break; \
} \
da->data[da->meta.used++] = __VA_ARGS__; \
} while (0)
When used, the code can look like this:
int main(void)
{
dynarr(int) *int_array = new_da(int_array, 0);
if (!int_array) goto error;
printf("%zu out of %zu\n",
int_array->meta.used,
int_array->meta.size);
for (int i = 0; i < 5; i++) {
da_append(int_array, i);
if (!int_array) goto error;
}
for (int i = 0; i < da_len(int_array); i++) {
printf("%d ", da_at(int_array, i));
}
printf("\n");
da_free(int_array);
return 0;
error:
return 1;
}

Just remember about padding between between meta and the start of the array and about alignment requirements and you should be fine.
because C should put the memory of the first member of a struct first in a struct, and that is where I put struct meta, but am I right here?
Yes.
Am I guaranteed that if I lay out the concrete dynamic arrays with the meta-information at the top of the structs, then I can treat the allocate memory as both a pointer to the meta-information
Yes, and...
and the array?
No. The array starts at address after meta + padding. So at address (char*)da + sizeof(dynarray(TYPE)) or just da->data.
Is it safe to assume that I get the same size and memory layout if I generate the same struct twice?
No and yes. There are many other great stackoverflow questions and answers about that topic - research them. Pragmatically yes, it would be a strange compiler that would would generate different padding for the same looking struct, but technically that's allowed.
using a flexible array
Unless you have specific aim, then I would just advise not to use them. It makes it harder for you to write the code. It makes it very hard to create and manage an array of such arrays.
goto abort;
What an unfortunate name for a goto label - abort() is a standard function.
#define grow(size)
Please use a prefix to all your library functions, especially macros. Defining such macro will make it impossible to use it in other code that happens to use a different grow() function. da_ seems like a good prefix.
I guess *da->data in realloc_dynarray_mem should be sizeof(*da->data).
#edit

I would suggest to use typeof keyword in new_da(). This would avoid specifying the type twice: in dynarray(TYPE) and in new_da(type, init_size). To make it, instead of passing the type, just pass the pointer on the dynamic array:
#define new_da(da, init_size) \
(da) = new_dynarray_mem(sizeof(dynarray(typeof(*(da)))), \
sizeof(typeof((da)->data[0])), (init_size))
Hence, this would avoid the mistake where the type used in the definition would differ from the type used in the allocation:
dynarray(int) *pInt;
pInt = new_da(char, 1024);
UPDATE FROM DISCUSSION IN COMMENTS:
And what about a single macro to define and initialize ?
#define new_da(da, type, init_size) \
dynarray(type) *da = new_dynarray_mem(sizeof(dynarray(type)), sizeof(type), init_size)

Related

Bitwise operation with unsigned long

memset_pg.h
#include <stdint.h>
#include<stdio.h>
#include<string.h>
#define LONG_ALIGN_MASK (sizeof(long) - 1)
typedef size_t Size;
#define MEMSET_LOOP_LIMIT 1024
/*
* MemSet
* Exactly the same as standard library function memset(), but considerably
* faster for zeroing small word-aligned structures (such as parsetree nodes).
* This has to be a macro because the main point is to avoid function-call
* overhead. However, we have also found that the loop is faster than
* native libc memset() on some platforms, even those with assembler
* memset() functions. More research needs to be done, perhaps with
* MEMSET_LOOP_LIMIT tests in configure.
*/
#define MemSet(start, val, len) \
do \
{ \
/* must be void* because we don't know if it is integer aligned yet */ \
void *_vstart = (void *) (start); \
int _val = (val); \
Size _len = (len); \
\
printf("_vstart: %lu\n",(uintptr_t) _vstart); \
if ((((uintptr_t) _vstart) & LONG_ALIGN_MASK) == 0 && \
(_len & LONG_ALIGN_MASK) == 0 && \
_val == 0 && \
_len <= MEMSET_LOOP_LIMIT && \
/* \
* If MEMSET_LOOP_LIMIT == 0, optimizer should find \
* the whole "if" false at compile time. \
*/ \
MEMSET_LOOP_LIMIT != 0) \
{ \
long *_start = (long *) _vstart; \
long *_stop = (long *) ((char *) _start + _len); \
while (_start < _stop) \
*_start++ = 0; \
printf("non-standard MemSet invoked\n"); \
} \
else { \
memset(_vstart, _val, _len); \
printf("standard memset invoked\n"); \
} \
} while (0)
#define TEST "test"
memset_pg.c
/*
gcc -Wall -Werror memset_pg.c && ./a.out
*/
#include "memset_pg.h"
#include<stdio.h>
#include<inttypes.h>
#include<assert.h>
int main(void)
{
printf("LONG_ALIGN_MASK:%ld\n",LONG_ALIGN_MASK);
// char str[] = "beautiful earth";
char str[] = "earth567";
printf("strlen=%ld\n",strlen(str));
MemSet(str,0,strlen(str));
printf("via MemSet: str return |%s|\n",str);
printf("str pointer:%ld\n", (uintptr_t)str);
return 0;
}
I am not sure this part ((uintptr_t) _vstart) & LONG_ALIGN_MASK mean. It means at least the pointer cast to unsign long ending 3 bit should be 000. But I don't know the pattern mean.
typedef struct POD_OnlyStruct{
int a;
int b;
char d;
}POD_OnlyStruct;
POD_OnlyStruct t;
MemSet(&t,0, sizeof t);
the above will not invoke non-standard memset.
However, the following will invoke the non-standard memset.
typedef struct POD_OnlyStruct{
int a;
int b;
int c;
char d;
}POD_OnlyStruct;
POD_OnlyStruct t;
MemSet(&t,0, sizeof t);
(_len & LONG_ALIGN_MASK) == 0 means that the _len is power of 8.
In long *_stop = (long *) ((char *) _start + _len); I am not sure the usage of (char *).
I am not sure this part ((uintptr_t) _vstart) & LONG_ALIGN_MASK mean.
_vstart Is a void pointer. By casting it to a uintptr_t it becomes a number we can work with, this suppresses an error for the next operation. By doing the & LONG_ALIGN_MASK we check if this pointer is aligned to some boundry. According to the rest of your post, we check if the last three digits are zero.
The guiding text tells you why to do it. To me (purely opinion here) it needs to have a massive advantage over the memset in the libraries to be worth it, because the code is hard to read.
Edit: A new question was added:
In long *_stop = (long *) ((char *) _start + _len); I am not sure the usage of (char *)
Pointer arithmetic! A char is by definition 1 byte, but a long can be a couple more. Say we do long* a = ((long*)NULL) + 1, we now see that a = sizeof(long)/sizeof(char) = sizeof(long). This is just how pointer arithmetic works, adding one to some pointer will actually add the size of the type of the pointer to it. This is very useful when, for example, traversing an array via a pointer, plus 1 will always go to the start of the next element (given that you started at the start of some element).
So the cast to char* here makes sure that we are adding _len to _start, and not _len*sizeof(long). This usage, by the way, means that _len has to be an uintptr_t and not a size_t. A size_t is defined as the maximum array index, while uintptr_t is guaranteed to be able to contain any pointer. On most systems this does not matter (max array index == UINTPTR_MAX, usually), but technically this is an issue.
Note that casting _vstart to uintptr_t instead of (char*) would have had the same effect and maybe be more readable.
I am not sure this part ((uintptr_t) _vstart) & LONG_ALIGN_MASK mean.
This is to check whether the start address has the same alignment as a long, because if it is not, then the expression (long *) _vstart has undefined behaviour.
Note that nowadays compilers know that memset() clears memory, and will actually inline it if they see you are only setting a small amount of memory. So this MemSet() macro is completely unnecessary. In fact, some compilers might even see that the while-loop in that code is equivalent to a memset(), and replace it with a function call if they think that is more efficient (note that compilers can be told to optimize for size over performance).
In long *_stop = (long *) ((char *) _start + _len); I am not sure the usage of (char *).
This is because _start is a pointer to long. If you add _len to that, it would advance it _len times the size of long. To make sure it just adds _len bytes, you need to cast it to char * first. Also remember that ptr + offset is equivalent to &ptr[offset].

Why do C macros act as pointers even though I am not passing variables by pointer

I am trying to write a series of C macros to provide some generic data type capability for a struct, or grouping of structs that manages a dynamically allocated array. At this point I have written several structs for each data type and am just starting to write a Macro function that can initialize the struct variables and allocated a user defined amount of memory. The idea is to create one init function that can initialize any type of struct, so as long as it has the write form. For this implementation I first have to instantiate a copy of the struct, which is then passed to the macro. The macro ensures that the variables allocated_length and active_length is of type size_t. Then it determines the type of array by de-referencing it and using thetypeof operator. Finally it allocates memory to ptr and then checks to ensure the allocation was not returned as NULL. However, in this example, I do not pass anything back to the main program, and I am not passing variables as a pointer. Yet, somehow, the struct in the structs in the main program are able to see the modifications I made in the Macro. How is this?
#define init_vector(vec_struct, aloc_length) ({size_t _a = (aloc_length); \
size_t _b = 0; \
typeof(vec_struct.array) ptr = malloc(aloc_length * sizeof(&vec_struct.array)); \
if (ptr == NULL) { \
perror("WARNING: "); \
exit(0); \
} \
vec_struct.allocated_length = _a; \
vec_struct.active_length = _b; \
vec_struct.array = ptr; \
})
typedef struct
{
int *array;
size_t allocated_length;
size_t active_length;
} IntVector;
typedef struct
{
float *array;
size_t allocated_length;
size_t active_length;
} FltVector;
int main() {
IntVector int_vec;
init_vector(int_vec, 30);
printf("%ld\n", int_vec.allocated_length);
FltVector float_vec;
init_vector(float_vec, 20);
printf("%ld\n", float_vec.allocated_length);
return 0;
}
You need to understand that macros are not functions. They simply replace text (or more precise tokens) before the actual C compilation starts.
The compiler will compile this code:
int main() {
IntVector int_vec;
({size_t _a = (30); size_t _b = 0; typeof(int_vec.array) ptr = malloc(30 * sizeof(&int_vec.array)); if (ptr ==
# 32 "/app/example.c" 3 4
((void *)0)
# 32 "/app/example.c"
) { perror("WARNING: "); exit(0); } int_vec.allocated_length = _a; int_vec.active_length = _b; int_vec.array = ptr; });
printf("%ld\n", int_vec.allocated_length);
FltVector float_vec;
({size_t _a = (20); size_t _b = 0; typeof(float_vec.array) ptr = malloc(20 * sizeof(&float_vec.array)); if (ptr ==
# 36 "/app/example.c" 3 4
((void *)0)
# 36 "/app/example.c"
) { perror("WARNING: "); exit(0); } float_vec.allocated_length = _a; float_vec.active_length = _b; float_vec.array = ptr; });
printf("%ld\n", float_vec.allocated_length);
return 0;
}
https://godbolt.org/z/ezvKfdn33
Is it something you have expected?
Macros have to be used with great caution and as little as possible.
From https://en.cppreference.com/w/c/preprocessor/replace:
Function-like macros
#define identifier( parameters ) replacement-list
Function-like macros replace each occurrence of a defined identifier with replacement-list, additionally taking a number of arguments, which then replace corresponding occurrences of any of the parameters in the replacement-list.
The syntax of a function-like macro invocation is similar to the syntax of a function call: each instance of the macro name followed by a ( as the next preprocessing token introduces the sequence of tokens that is replaced by the replacement-list. The sequence is terminated by the matching ) token, skipping intervening matched pairs of left and right parentheses.
...
That means (based on your example) that every occurrence of the identifier init_vector is replaced with the code after the last parentheses of the parameter list.
And each occurrence of the parameters vec_struct, aloc_length will also replaced accordingly.
At the end, it is not about functions and function calls, but replacement.
"The preprocessor supports text macro replacement and function-like text macro replacement."

Variable sized dereferencing in c

I have a Destination pointer stored in void* DestPointer and a source pointer stored in void* SrcPointer. Assume the destination pointer, points to starting adress of the free and safe memory and source pointer points to the starting adress of memory containing data it's type may be int16_t or int32_t) but can be interfered with counter i. Now I want to fill the memory by :
*(uint16_t*)DestPointer=*(uint16_t*) SrcPointer
I want this uint8_t change according to i, it could be a bounus if the stated problem generalizer to something like.
i==-1 -> int8_t
i==-2 -> int16_t
i==-4 -> int32_t
i==1 -> uint8_t
i==2 -> uint16_t
i==4 -> uint32_t
How to implement it? I just know enumeration can't work maybe some macro or magical c expression help.
It seems I need to implement memcpy() in a specific way!
Those strange indices aren't very helpful - it would have been much easier with adjacent numbers 0,1,2... So the best thing to do is to get rid of those negative numbers. Another good option would be to pick the copy size based on type with _Generic.
Otherwise assuming you can't change those index numbers... When dealing with bad interfaces that you are stuck with and can't change, X-macros might often be an ok solution. Something like this:
#define INT_TYPES(X) \
/* type val */ \
X(int8_t, -1) \
X(int16_t, -2) \
X(int32_t, -4) \
X(uint8_t, 1) \
X(uint16_t, 2) \
X(uint32_t, 4) \
Then create an enum eint8_t = -1, eint16_t = -2 and so on:
typedef enum
{
#define INT_TYPE_ENUM(type, val) e##type = (val),
INT_TYPES(INT_TYPE_ENUM)
} int_t;
Then a look-up table to compensate for the negative indices. This is using designated initializers so we get for example index [4 + -4] = sizeof(int32_t):
#define lookup_offset 4
const size_t int_type_lookup[] =
{
#define INT_TYPE_SIZE(type, val) [lookup_offset+(val)] = sizeof(type),
INT_TYPES(INT_TYPE_SIZE)
};
(Similarly you could make a function pointer look-up table which might be handy in some cases.)
The copy function which is a wrapper around memcpy:
void intcpy (void* restrict dst,
const void* restrict src,
size_t n,
int_t type)
{
memcpy(dst,src, n*int_type_lookup[lookup_offset + type]);
}
Complete example:
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#define INT_TYPES(X) \
/* type val */ \
X(int8_t, -1) \
X(int16_t, -2) \
X(int32_t, -4) \
X(uint8_t, 1) \
X(uint16_t, 2) \
X(uint32_t, 4) \
typedef enum
{
#define INT_TYPE_ENUM(type, val) e##type = (val),
INT_TYPES(INT_TYPE_ENUM)
} int_t;
void intcpy (void* restrict dst,
const void* restrict src,
size_t n,
int_t type)
{
#define lookup_offset 4
const size_t int_type_lookup[] =
{
#define INT_TYPE_SIZE(type, val) [lookup_offset+(val)] = sizeof(type),
INT_TYPES(INT_TYPE_SIZE)
};
memcpy(dst,src, n*int_type_lookup[lookup_offset + type]);
}
int main (void)
{
int16_t some_data [50]={1,2,3,4,5};
int16_t dst[50];
intcpy(dst, some_data, 50, eint16_t);
for(size_t i=0; i<5; i++)
{
printf("%d ", (int)dst[i]);
}
return 0;
}

Is it possible to create generic functions in C?

I am getting back into using C, but I've been spoiled by generics in other languages. I have made it to the following piece of code in my implementation of a resizable array:
typdef struct {
void** array;
int length;
int capacity;
size_t type_size;
} Vector;
void vector_add(Vector* v, void* entry) {
// ... code for adding to the array and resizing
}
int main() {
Vector* vector = vector_create(5, sizeof(int));
vector_add(vector, 4); // This is erroneous...
// ...
}
In my attempt to make this generic, I'm now unable to add an integer to the vector without storing it in memory somewhere else.
Is there any way to make this work (either as is, or possibly a better approach to generics)?
For my answer I am assuming that you are not familiar with the sections of memory (ie the use of the memory pool).
In my attempt to make this generic, I'm now unable to add an integer to the vector without storing it in memory somewhere else.
If you want to create a generic structure (as you did) then you will need to use void pointers. Consequently, from the use of void pointers you will need to store the values for each field on the memory pool, or uncommonly on the stack. Note, the structure is composed of void pointers and hence only memory addresses are contained within the structure, pointing to other locations in memory where the values are.
Be careful if you declare them on the stack as once your stack frame is popped from the call stack those memory addresses are not considered to be valid and hence may be used by another stack frame (overwriting your existing values within that collection of memory addresses).
Aside: If you migrate to C++ then you can consider the use of C++ templates.
Yes; you can embrace Greenspun's Tenth Rule and develop a full blown dynamic language in C, and in the process, develop a relatively clean C run time that can be used from within C.
In this project I did just that, as have others before me.
In the C run time of this project, a generic number would be created from a C number like this:
val n = num(42);
because of the way val is represented, it takes up only a machine word. A few bits of type tag are used to distinguish a number from a pointer, from a character, etc.
There is also this:
val n = num_fast(42);
which is much faster (a bit manipulation macro) because it doesn't do any special checks that the number 42 fits into the "fixnum" range; it's used for small integers.
A function that adds its argument to every element of a vector could be written (very inefficiently) like this:
val vector_add(val vec, val delta)
{
val iter;
for (iter = zero; lt(iter, length(vec)); iter = plus(iter, one)) {
val *pelem = vecref_l(vec, iter);
*pelem = plus(*pelem, delta);
}
return nil;
}
Since plus is generic, this will work with fixnums, bignums and reals, as well as with characters, since it is possible to add integer displacements to characters via plus.
Type mismatch errors will be caught by the lower level functions and turned into exceptions. For instance if vec isn't something to which length can be applied, length will throw.
Functions with a _l suffix return a location. Wherease vecref(v, i) returns the value at offset i in vector v, vecref_l(v, i) returns a pointer to the val typed location in the vector which stores that value.
It's all C, just with the ISO C rules bent a little bit: you can't make a type like val efficiently in strictly conforming C, but you can do it quite portably to architectures and compilers you care about supporting.
Our vector_add isn't generic enough. It's possible to do better:
val sequence_add(val vec, val delta)
{
val iter;
for (iter = zero; lt(iter, length(vec)); iter = plus(iter, one)) {
val elem = ref(vec, iter);
refset(vec, iter, plus(elem, delta));
}
return nil;
}
By using the generic ref and refset, this now works with lists and strings also, not only vectors. We can do something like:
val str = string(L"abcd");
sequence_add(str, num(2));
The contents of str will change to cdef since a displacement of 2 is added to each character, in place.
Your idea can be done:
int *new_int = (int*)malloc(sizeof(int));
*new_int = 4;
vector_add(vector, new_int);
Naturally, it would be a good idea to do a int *create_int(int x) function or something similar:
int *create_int(int x)
{
int *n = (int*)malloc(sizeof(int));
*n = 4;
return n;
}
//...
vector_add(vector, create_int(4));
If your environment allows it you may consider using a well tested, widely used library that already manages all that, such as Glib. Or even C++.
You can avoid having many many small allocations by storing the data instead of pointers to it, like
typedef struct {
char* array;
int length;
int capacity;
size_t type_size;
} Vector;
bool vector_add(Vector* v, void* entry)
{
if (v->length < v->capacity || vector_expand(v)) {
char* location = v->array + (v->length++)*(v->type_size);
memcpy(location, entry, v->type_size);
return 1;
}
return 0; // didn't fit
}
int main()
{
Vector* vector = vector_create(5, sizeof(int));
int value = 4;
vector_add(vector, &value); // pointer to local is ok because the pointer isn't stored, only used for memcpy
}
Yes, here's an implementation of mine (similar to yours) that may help. It uses macros that can be wrapped with function calls for immediate values.
#ifndef VECTOR_H
# define VECTOR_H
# include <stddef.h>
# include <string.h>
# define VECTOR_HEADROOM 4
/* A simple library for dynamic
* string/array manipulation
*
* Written by: Taylor Holberton
* During: July 2013
*/
struct vector {
void * data;
size_t size, len;
size_t headroom;
};
int vector_init (struct vector *);
size_t vector_addc (struct vector *, int index, char c);
size_t vector_subc (struct vector *, int index);
// these ones are just for strings (I haven't yet generalized them)
size_t vector_adds (struct vector *, int index, int iend, const char * c);
size_t vector_subs (struct vector *, int ibegin, int iend);
size_t vector_addi (struct vector *, int index, int i);
size_t vector_subi (struct vector *, int index);
# define vector_addm(v, index, datatype, element) \
do { \
if (!v) return 0; \
\
if (!v->size){ \
v->data = calloc (v->headroom, sizeof (datatype)); \
v->size = v->headroom; \
} \
\
datatype * p = v->data; \
\
if (v->len >= (v->size - 2)){ \
v->data = realloc (v->data, \
(v->size + v->headroom) * sizeof (datatype)); \
p = v->data; \
memset (&p[v->size], 0, v->headroom * sizeof(datatype));\
v->size += v->headroom; \
} \
\
if ((index < 0) || (index > v->len)){ \
index = v->len; \
} \
\
for (int i = v->len; i >= index; i--){ \
p[i + 1] = p[i]; \
} \
\
p[index] = element; \
\
v->len++; \
\
} while (0)
# define vector_subm(v, index, datatype) \
do { \
if (!v || !v->len){ \
return 0; \
} \
\
if ((index < 0) || (index > (v->len - 1))){ \
index = v->len - 1; \
} \
\
datatype * p = v->data; \
\
for (int i = index; i < v->len; i++){ \
p[i] = p[i + 1]; \
} \
\
v->len--; \
\
if ((v->size - v->len) > v->headroom){ \
v->data = realloc (v->data, ((v->size - v->headroom) + 1) * sizeof (datatype));\
v->size -= v->headroom; \
} \
\
} while (0)
#endif
And I usually wrap them like:
size_t vector_addi (struct vector * v, int index, int i){
vector_addm (v, index, int, i);
return v->len;
}
I haven't had this code-reviewed, but I've been using it in a large program I'm writing and I haven't had any memory errors from them (using valgrind).
The only thing that is really missing (I've been meaning to add) the ability to add and subtract arrays from arrays.
Edit: I believe you can also do this same sort of thing with stdarg.h, but I've never tried it.
You asked for a better approach? Here ist is: https://github.com/m-e-leypold/glitzersachen-demos/tree/master/generix/v0-2011 (Disclosure: This is my code).
Let me explain very shortly:
I wanted type safe generic containers (which in other languages would be provided by proper generics (Ada) or parametric polymorphism (OCaml). This is the the feature that is most missing in C.
Macros just cannot do it (I'm not
going to explain that in detail. Suffice to say: The result of a template expansion or
generic instantiation should be a module in it's own right: In C this means, there are pre
processor symbols exported respectively can be used for module configuration (like
-DUSE_PROCESS_QUEUE_DEBUGCODE) you couldn't do that if you used C macros to generate
instances.
I'm abstracting over element type by moving element size and all relevant operation into a descriptive structure. This will be passed to every invocation of the generic code. Note that the descriptor describes the element type, so a descriptor instance will be needed once per generic instance.
I'm using a template processor to create a thin type safe frontend module to the generic code.
Example:
This is the prototype for the generic code to retrieve an element:
void fifo_get ( fifo_DESCRIPTOR* inst, fifo* , void* var );
This is the descriptor type:
typedef struct fifo_DESCRIPTOR {
size_t maxindex;
size_t element_size;
} fifo_DESCRIPTOR;
This is the template code in the type safe wrapper template:
<<eT>> <<>>get ( <<T>>* f ) {
<<eT>> e; fifo_get( &DESCRIPTOR, (fifo*) f, (void*) &e ); return e;
}
And this is what the template expander (instantiating an generic) produces from the template:
float floatq_get ( floatq* f ) {
float e; fifo_get( &DESCRIPTOR, (fifo*) f, (void*) &e ); return e;
}
All this has a nice make integration, but hardly any type safety in instantiation. Every error only crops up when compiling with cc.
I cannot justify at the moment, why to stick with source text templates in C instead of migrating to C++. For me, it was just an experiment.
Regards.
This approach will probably horrify you, but it can be made to work if you don't need any type-specialized logic:
// vector.h
#ifndef VECTOR_H
#define VECTOR_H
#define VECTOR_IMP(itemType) \
typedef struct { \
itemType * array; \
int length; \
int capacity; \
} itemType##_Vector; \
\
static inline void itemType##_vector_add(itemType##_Vector* v, itemType v) { \
// implementation of adding an itemType object to the array goes here \
} \
\
[... other static-inline generic vector methods would go here ...] \
// Now we can "instantiate" versions of the Vector struct and methods for
// whatever types we want to use.
VECTOR_IMP(int);
VECTOR_IMP(float);
VECTOR_IMP(char);
#endif
... and some example calling code:
#include "vector.h"
int main(int argc, char ** argv)
{
float_Vector fv = {0};
int_Vector iv = {0};
char_Vector cv = {0};
int_vector_add(&iv, 5);
float_vector_add(&fv, 3.14f);
char_vector_add(&cv, 'A');
return 0;
}
Instead of having the vector class store the added object, you could just return a pointer to the location where the caller can store it:
typdef struct {
char *buffer;
size_t length;
size_t capacity;
size_t type_size;
} Vector;
void *vector_add(Vector* v)
{
if (v->length == v->capacity) {
// ... increase capacity by at least one
// ... realloc buffer to capacity * type_size
}
return v->buffer + v->type_size * v->length++;
}
// in main:
*(int*)vector_add(v) = 4;
Using some non-standard GNU C extensions, it is possible to define generic functions with inferred parameter types. This macro defines a nested function in a statement expression and infers the parameter type using typeof:
#include <stdio.h>
#define fib(n1) ({\
typeof(n1) func(typeof(n1) n){\
if (n <= 1)\
return n;\
return func(n-1) + func(n-2);\
}\
func(n1);\
})
int main()
{
printf("%d\n",fib(3));
printf("%f\n",fib(3.0));
return 0;
}

Manage almost identical data structures in C

With the following (simplified) data definition :
#define DIM0 10
#define DIM1 15
typedef struct {
uint32_t var1:
...
int8_t arrayVar1[DIM0];
} dataClass0;
typedef struct {
uint32_t var1:
...
int8_t arrayVar1[DIM1];
} dataClass1;
At one given point I must create an array of these structures and process them.
The processing is exactly the same except for the arrays (different length). Right now it's something like:
dataClass0 *data;
data = (dataClass0 *) malloc(dimension * sizeof (dataClass0));
// Processing and filling structure
data[i].var1 = <value>
...
Right now I have the same function duplicated for each data classs. Is there a way around duplicating code when using these data structures?
Notes:
Only pure C, no C++;
I cannot change the data definition (i.e. cannot use int8_t *arrayVar1 in the struct).
When processing I receive the type of data to process (0 for class0, 1 for class1, ...).
typedef struct {
uint32_t var1:
...
int8_t arrayVar[]; /* Declare as flexible array, allowed since C99 */
} dataClass;
allocate with something like that:
data1 = malloc(sizeof (dataCLass) + DIM1*sizeof ((dataClass*)NULL)->arrayVar[0]);
data2 = malloc(sizeof (dataCLass) + DIM2*sizeof ((dataClass*)NULL)->arrayVar[0]);
or define
#define ALLOCDATA(dim) malloc(sizeof (dataCLass) + (dim)*sizeof ((dataClass*)NULL)->arrayVar[0]);
define
#define ELEMENT1(data, i) (dataClass*)(((char*)(data))+(i)*(DIM1+sizeof (dataCLass)))
#define ELEMENT2(data, i) (dataClass*)(((char*)(data))+(i)*(DIM2+sizeof (dataCLass)))
or if you parametrize the DIM
#define ELEMENT(data, i, dim) (dataClass*)(((char*)(data))+(i)*((dim)+sizeof (dataCLass)))
enjoy
ELEMENT1(data1, i)->var1 = 1;
ELEMENT1(data1, i)->arrayVar1[9] = 4;
ELEMENT2(data2, i)->arrayVar1[14] = 4;
or
ELEMENT(data1, i, DIM1)->var1 = 1;
ELEMENT(data1, i, DIM1)->arrayVar1[9] = 4;
ELEMENT(data2, i, DIM2)->arrayVar1[14] = 4;
Not perfect, but not too weird a construct to not be usable.
EDIT:
The ELEMENT define should be changed to
#define ELEMENT1(data, i) (dataClass*)(((char*)(data))+(i)*(DIM1*sizeof ((dataClass*)NULL)->arrayVar[0]+sizeof (dataCLass)))
#define ELEMENT2(data, i) (dataClass*)(((char*)(data))+(i)*(DIM2*sizeof ((dataClass*)NULL)->arrayVar[0]+sizeof (dataCLass)))
#define ELEMENT(data, i, dim) (dataClass*)(((char*)(data))+(i)*((dim)*sizeof ((dataClass*)NULL)->arrayVar[0]+sizeof (dataCLass)))
with this change, your arrayVar field can be of any type and is not limited to elements of size 1.
Can't you just make the array dynamic? So that you create your structures with malloc() and then initialize some member to hold the size (and make sure it ends with an uint8_t * instead of an actual array, or use VLA's)?
It depends whether or not you want to fill the arrays with different values initially. Otherwise you can just have a macro that initializes both types of structures
#define STRUCTURE_INITIALIZER(VAR1, VAR2) { .var1 = (VAR1), .var2 = (VAR2) }
and use that as
dataClass0 data = STRUCTURE_INITIALIZER(31, 42);
your array components would then always be 0 initialized, regardless of their size.
To initialize an malloced array of your stuff:
dataClass0 *data = malloc(dimension * sizeof (dataClass0));
// Processing and filling structure
for (size_t i = 0; i < dimension; ++i)
data[i]= (dataClass0)STRUCTURE_INITIALIZER(43, i);
BTW, prefer to initialize variables properly and don't cast the return of malloc.

Resources