Related
I am fiddling around with an implementation of a generic dynamic array. The array should hold information about its size, how many entries are used, and then hold the actual data. The meta-information (size/used) is generic, but the data needs to handle different types, so I am handling that with macros. I am trying, however, to get the memory allocation code into functions. So my thought it is: I have a struct for meta-information
struct da_meta {
size_t size;
size_t used;
};
and then I have a macro that creates a struct per type, using a flexible array following the meta information:
#define dynarray(TYPE) \
struct { \
struct da_meta meta; \
TYPE data[]; \
}
I can declare an integer array, for example, as
dynarray(int) *int_array = 0;
To allocate and reallocate arrays, my thought was now to use code such as this:
#define size_overflow(meta_size, obj_size, len) \
((SIZE_MAX - meta_size) / obj_size < len)
// Always free if we cannot reallocate
void *realloc_dynarray_mem(void *p,
size_t meta_size,
size_t obj_size,
size_t new_len)
{
if (size_overflow(meta_size, obj_size, new_len))
goto abort;
struct da_meta *new_da =
realloc(p, meta_size + obj_size * new_len);
if (!new_da) goto abort;
new_da->size = new_len;
new_da->used = MIN(new_da->used, new_len);
return new_da;
abort:
free(p);
return 0;
}
The function gets the size of the struct sans the data objects, the size of individual objects, and the number of objects to allocate memory for. I don't use the size of the struct meta type, because it might be too small depending on the alignment of the data objects, but I will get it from sizeof the concrete (typed) structures. The function will always free the input and return NULL if I cannot allocate because in my application I have to give up if I cannot grow the array, so I don't try to preserve the old data in case there is an error.
There is nothing wrong with this code, as far as I can tell. I can always allocate memory, and as long as I have more than the size of struct meta, I can set the variables there. But when I return the result and use it as a dynarray(T) type, I am less sure. I think it should work, because C should put the memory of the first member of a struct first in a struct, and that is where I put struct meta, but am I right here?
I create a new array like this:
void *new_dynarray_mem(size_t meta_size,
size_t obj_size,
size_t len)
{
struct da_meta *array =
realloc_dynarray_mem(0, meta_size, obj_size, len);
if (array) {
// we do set size in realloc, but
array->size = len;
// if used was not initialised in realloc (and it wasn't)
// then we have to set it here...
array->used = 0;
}
return array;
}
#define new_da(type, init_size) \
new_dynarray_mem(sizeof(dynarray(type)), \
sizeof(type), init_size)
Here, the macro new_da() gets the size of the header/meta information from sizeof(dynarray(type)) and the size of the underlying types from sizeof(type). The second value is fine, but I am also uncertain about the first. Does the C standard guarantee that if I create two different structs with exactly the same code, e.g., calling dynarray(int) twice, that I get the same memory layout? I cannot imagine a compiler that would give me a different layout for the same code, but when it comes to imagining what compilers get up to, I am quite limited.
For appending to the array, I think all is fine. There I do not generate new types but get the size from the existing dynamic array, so if the first allocation is standard compliant, then I think the appending is as well, but I could be wrong.
#define da_free(da) \
do { free(da); da = 0; } while(0)
#define grow(size) \
(((size) == 0) ? /* special case for zero */ \
1 : \
((size) > SIZE_MAX / 2) ? /* can we grow? */ \
0 : /* no, then report size zero */ \
(2 * (size))) /* double the size */
#define da_append(da, ...) \
do { \
if (da->meta.used == da->meta.size) { \
size_t new_size = grow(da->meta.size); \
if (new_size == 0) { da_free(da); break; } \
da = realloc_dynarray_mem( \
da, sizeof *da, *da->data, new_size \
); \
if (!da) break; \
} \
da->data[da->meta.used++] = __VA_ARGS__; \
} while (0)
Am I guaranteed that if I lay out the concrete dynamic arrays with the meta-information at the top of the structs, then I can treat the allocate memory as both a pointer to the meta-information and the array? Is it safe to assume that I get the same size and memory layout if I generate the same struct twice? I feel that it must be that way since it shouldn't differ from if I include the same header file twice, but since I am generating the code there might be something that I am missing.
EDIT Based on the comments, I have updated the code to that below, but I have left the original code (of course) so the comments make sense in terms of that.
#define da_at(da,i) (da->data[(i)])
#define da_len(da) (da->meta.used)
struct da_meta {
size_t size;
size_t used;
};
#define dynarr(TYPE) \
struct { \
struct da_meta meta; \
TYPE data[]; \
}
// Always free if we cannot reallocate
void *realloc_dynarray_mem(struct da_meta *p,
size_t meta_size,
size_t obj_size,
size_t new_len)
{
// Size size overflow?
if (((SIZE_MAX - meta_size) / obj_size < new_len))
goto fail;
struct da_meta *new_da =
realloc(p, meta_size + obj_size * new_len);
if (!new_da) goto fail;
new_da->size = new_len;
new_da->used = MIN(new_da->used, new_len);
return new_da;
fail:
free(p);
return 0;
}
void *new_dynarray_mem(size_t meta_size,
size_t obj_size,
size_t len)
{
struct da_meta *array =
realloc_dynarray_mem(0, meta_size, obj_size, len);
if (array) array->used = 0;
return array;
}
void *grow_dynarray_mem(struct da_meta *p,
size_t meta_size,
size_t obj_size)
{
// Can we double the length?
size_t used = meta_size - obj_size * p->size;
size_t adding = MAX(1, p->size);
if ((SIZE_MAX - used) / obj_size < adding) {
free(p);
return 0;
}
return realloc_dynarray_mem(
p, meta_size, obj_size, p->size + adding
);
}
#define new_da(da, init_size) \
new_dynarray_mem(sizeof *(da), \
sizeof *(da)->data, \
(init_size))
#define da_free(da) \
do { free(da); da = 0; } while(0)
#define da_append(da, ...) \
do { \
if (da->meta.used == da->meta.size) { \
da = grow_dynarray_mem( \
(struct da_meta *)da, \
sizeof *da, sizeof *da->data \
); \
if (!da) break; \
} \
da->data[da->meta.used++] = __VA_ARGS__; \
} while (0)
When used, the code can look like this:
int main(void)
{
dynarr(int) *int_array = new_da(int_array, 0);
if (!int_array) goto error;
printf("%zu out of %zu\n",
int_array->meta.used,
int_array->meta.size);
for (int i = 0; i < 5; i++) {
da_append(int_array, i);
if (!int_array) goto error;
}
for (int i = 0; i < da_len(int_array); i++) {
printf("%d ", da_at(int_array, i));
}
printf("\n");
da_free(int_array);
return 0;
error:
return 1;
}
Just remember about padding between between meta and the start of the array and about alignment requirements and you should be fine.
because C should put the memory of the first member of a struct first in a struct, and that is where I put struct meta, but am I right here?
Yes.
Am I guaranteed that if I lay out the concrete dynamic arrays with the meta-information at the top of the structs, then I can treat the allocate memory as both a pointer to the meta-information
Yes, and...
and the array?
No. The array starts at address after meta + padding. So at address (char*)da + sizeof(dynarray(TYPE)) or just da->data.
Is it safe to assume that I get the same size and memory layout if I generate the same struct twice?
No and yes. There are many other great stackoverflow questions and answers about that topic - research them. Pragmatically yes, it would be a strange compiler that would would generate different padding for the same looking struct, but technically that's allowed.
using a flexible array
Unless you have specific aim, then I would just advise not to use them. It makes it harder for you to write the code. It makes it very hard to create and manage an array of such arrays.
goto abort;
What an unfortunate name for a goto label - abort() is a standard function.
#define grow(size)
Please use a prefix to all your library functions, especially macros. Defining such macro will make it impossible to use it in other code that happens to use a different grow() function. da_ seems like a good prefix.
I guess *da->data in realloc_dynarray_mem should be sizeof(*da->data).
#edit
I would suggest to use typeof keyword in new_da(). This would avoid specifying the type twice: in dynarray(TYPE) and in new_da(type, init_size). To make it, instead of passing the type, just pass the pointer on the dynamic array:
#define new_da(da, init_size) \
(da) = new_dynarray_mem(sizeof(dynarray(typeof(*(da)))), \
sizeof(typeof((da)->data[0])), (init_size))
Hence, this would avoid the mistake where the type used in the definition would differ from the type used in the allocation:
dynarray(int) *pInt;
pInt = new_da(char, 1024);
UPDATE FROM DISCUSSION IN COMMENTS:
And what about a single macro to define and initialize ?
#define new_da(da, type, init_size) \
dynarray(type) *da = new_dynarray_mem(sizeof(dynarray(type)), sizeof(type), init_size)
In C, the code
char *c = "Hello world!";
stores Hello world!\0 in rodata and initializes c with a pointer to it.
How can I do this with something other than a string?
Specifically, I am trying to define my own string type
typedef struct {
size_t Length;
char Data[];
} PascalString;
And then want some sort of macro so that I can say
const PascalString *c2 = PASCAL_STRING_CONSTANT("Hello world!");
And have it behave the same, in that \x0c\0\0\0Hello world! is stored in rodata and c2 is initialized with a pointer to it.
I tried using
#define PASCAL_STRING_CONSTANT(c_string_constant) \
&((const PascalString) { \
.Length=sizeof(c_string_constant)-1, \
.Data=(c_string_constant), \
})
as suggested in these questions, but it doesn't work because Data is a flexible array: I get the error error: non-static initialization of a flexible array member (with gcc, clang gives a similar error).
Is this possible in C? And if so, what would the PASCAL_STRING_CONSTANT macro look like?
To clarify
With a C string, the following code-block never stores the string on the stack:
#include <inttypes.h>
#include <stdio.h>
int main(void) {
const char *c = "Hello world!";
printf("test %s", c);
return 0;
}
As we can see by looking at the assembly, line 5 compiles to just loading a pointer into a register.
I want to be able to get that same behavior with pascal strings, and using GNU extensions it is possible to. The following code also never stores the pascal-string on the stack:
#include <inttypes.h>
#include <stdio.h>
typedef struct {
size_t Length;
char Data[];
} PascalString;
#define PASCAL_STRING_CONSTANT(c_string_constant) ({\
static const PascalString _tmpstr = { \
.Length=sizeof(c_string_constant)-1, \
.Data=c_string_constant, \
}; \
&_tmpstr; \
})
int main(void) {
const PascalString *c2 = PASCAL_STRING_CONSTANT("Hello world!");
printf("test %.*s", c2->Length, c2->Data);
return 0;
}
Looking at its generated assembly, line 18 is also just loading a pointer.
However, the best code I've found to do this in ANSI C produces code to copy the entire string onto the stack:
#include <inttypes.h>
#include <stdio.h>
typedef struct {
size_t Length;
char Data[];
} PascalString;
#define PASCAL_STRING_CONSTANT(initial_value) \
(const PascalString *)&(const struct { \
uint32_t Length; \
char Data[sizeof(initial_value)]; \
}){ \
.Length = sizeof(initial_value)-1, \
.Data = initial_value, \
}
int main(void) {
const PascalString *c2 = PASCAL_STRING_CONSTANT("Hello world!");
printf("test %.*s", c2->Length, c2->Data);
return 0;
}
In the generated assembly for this code, line 19 copies the entire struct onto the stack then produces a pointer to it.
I'm looking for either ANSI C code that produces the same assembly as my second example, or an explanation of why that's not possible with ANSI C.
You can use this macro, which names the name of the variable on its contents:
#define PASCAL_STRING(name, str) \
struct { \
unsigned char len; \
char content[sizeof(str) - 1]; \
} name = { sizeof(str) - 1, str }
To create such a string. Use it like this:
const PASCAL_STRING(c2, "Hello world!");
This can be done with the statment-expressions GNU extension, although it is nonstandard.
#define PASCAL_STRING_CONSTANT(c_string_constant) ({\
static const PascalString _tmpstr = { \
.Length=sizeof(c_string_constant)-1, \
.Data=c_string_constant, \
}; \
&_tmpstr; \
})
The extension allows you to have multiple statements in a block as an expression which evaluates to the value of the last statement by enclosing the block in ({ ... }). Thus, we can declare our PascalString as a static const value, and then return a pointer to it.
For completeness, we can also make a stack buffer if we want to modify it:
#define PASCAL_STRING_STACKBUF(initial_value, capacity) \
(PascalString *)&(struct { \
uint32_t Length; \
char Data[capacity]; \
}){ \
.Length = sizeof(initial_value)-1, \
.Data = initial_value, \
}
I am not sure why you would want to do it, but you could do it this way.
This method will store your string in the data segment and gives you a way to access it as a structure. Note that I create a packed structure to ensure that the mapping into the structure always works since I have essentially hard coded the data fields in the const expression below.
#include <stdio.h>
#pragma packed(1)
typedef struct {
unsigned char Length;
char Data[];
} PascalString;
#pragma pack()
const unsigned char HELLO[7] = {
0x06,
'H','E','L','L','O','\0'
};
int main(void) {
PascalString * myString = (PascalString *)HELLO;
printf("I say: %s \n", myString->Data);
}
In a previous question I found a way to overload functions in C99 when each function only took a single argument. See the answers in: Function overloading in C using GCC - compiler warnings for details.
Now that I've found a way to do it with single argument functions I'm wondering how this can be done for functions that take multiple arguments. I assume it will have something to do with __VA_ARGS__ and using ... but I can't seem to find anything that works or even wants to compile.
This will work for a print that takes 2 arguments:
#define print(x, y) \
__builtin_choose_expr(__builtin_types_compatible_p(typeof(x), int) && \
__builtin_types_compatible_p(typeof(y), int), print_int, \
(void)0)(x, y)
But if I also want another version that takes one argument I can't redefine it. Adding this will give me an error saying print is redefined:
#define print(x) \
__builtin_choose_expr(__builtin_types_compatible_p(typeof(x), char[]), print_string, \
(void)0)(x)
How can I overload print so it will work with 2 integers as an input or with a character array?
example usage:
print(1, 2);
print("this");
Or even better... how can I make it work with any combination of types or any number of arguments?
Also remember, since this is C99 the _Generic keyword is not available.
You can do what you want with GCC's extensions and with an overdose of preprocessor tricks. The commenters have already made their opinion clear: C is rather explicit and has a one-to-one relationship with the symbols produced. If you want function overloading and type inspection, use one of the many languages that provide them.
Baroque macro solutions tend to be toys rather than code that's suitable for production, but it's still an interesting exercise to push the envelope. Safety helemts on, though, and be aware that:
... the solution isn't portable, because the core gimmick of choosing arguments via types is already GCC specific.
... the solution is build on macros. Finding syntax errors in macros is difficult, because the error messages refer to expanded code that the user doesn't see.
... the solution pollutes the namespace with many macro names. If you really want to use this solution, prefix all your macros (except the most visible ones) consistenty as to minimize the danger of symbol collision.
That out of the way, let's implement a function put that writes its arguments to stdin according to its type:
const char *name = "Fred";
double C = 12.5;
put(1, " ", 2); // 1 2
put("Hello, I'm ", name, "!"); // Hello, I'm Fred!
put(C, " Celsius"); // 12.5 Celsius
put(C * 1.8 + 32.0, " Fahrenheit"); // 54.5 Fahrenheit
For the sake of simplicity, the solution accepts only up to three arguments of either int, const char * or double, but the maximum number of arguments is extensible.
The solution consists of these parts:
Variadic constant-type macros
Say you want to have a function that sums all arguments. The number of arguments may vary, but all arguments are of type double. If they are not of type double, they should be promoted to double.
Variadic functions aren't a good solution, because they will pass the arguments to the function per individual type. trying to sum(1, 2, 3) as double will have disastrous results.
Instead, you can use compound literals to create an array of doubles on the fly. Use the sizeof mechanism to get the length of the array. (The arguments may have side effects, because the array inside the sizeof isn't evaluated, only its size is determined.)
#define sum(...) sum_impl(sizeof((double[]){__VA_ARGS__})/ \
sizeof(double), (double[]){__VA_ARGS__})
double sum_impl(size_t n, double x[])
{
double s = 0.0;
while (n--) s += x[n];
return s;
}
This will yield 6.0 for sum(1, 2, 3) in a calculation performed on doubles.
Variant type
You want all arguments to be of the same type, but this type should be able to represent all supported types of your function. The C way to create a variant is to use a tagged union, a union inside a struct:
typedef struct var_t var_t;
struct var_t {
int type;
union {
int i;
double f;
const char *s;
} data;
};
The type could be an enumeration. I use charcter constants according the to printf formats here.
The variant of an expression is determined with a macro VAR, which is essentially the gcc specific you have posted above:
#define CHOOSE __builtin_choose_expr
#define IFTYPE(X, T) __builtin_types_compatible_p(typeof(X), T)
#define VAR(X) \
CHOOSE(IFTYPE(X, int), make_var_i, \
CHOOSE(IFTYPE(X, const char[]), make_var_s, \
CHOOSE(IFTYPE(X, const char *), make_var_s, \
CHOOSE(IFTYPE(X, double), make_var_f, \
make_var_0))))(X)
The macro invokes any of the make_var functions. These functions must be defined for each valid type:
var_t make_var_i(int X) { var_t v = {'i', {.i = X}}; return v; }
var_t make_var_s(const char *X) { var_t v = {'s', {.s = X}}; return v; }
var_t make_var_f(double X) { var_t v = {'f', {.f = X}}; return v; }
var_t make_var_0() { var_t v = {'#'}; return v; }
Incorporating the X into the type-dependent expression doesn't work, as you have already found out. Neither can you use compound literals with designated initialisers here, probably for the same reasons. (I've said that error checking with macros is hard, haven't I?)
This is the only GCC specific part; it could also be achieved with C11's _Generic.
Applying the macro to all arguments of a function
You must apply the VAR macro to all arguments of your variadic put macro. You cannot process the head of the variadic arguments until you get an empty list, because you cannot expand macros recursively, but you can use a trick that counts the arguments to the macro and then expand to a macro that takes that many arguments:
#define PUT1(_1) put_impl(1, (var_t[]){VAR(_1)})
#define PUT2(_1, _2) put_impl(2, (var_t[]){VAR(_1), VAR(_2)})
#define PUT3(_1, _2, _3) put_impl(3, (var_t[]){VAR(_1), VAR(_2), VAR(_3)})
#define SELECT_N(_1, _2, _3, N, ...) N
#define put(...) SELECT_N(__VA_ARGS__, PUT3, PUT2, PUT1)(__VA_ARGS__)
Now put takes 1, 2 or 3 arguments. If you provide more than 3, you get an obscure error message that doesn't have anything to do with not providing too many arguments.
The code above will not accept an empty argument list. With the GCC entension , ##__VA_ARGS, which will write a comma only if the variadicargument list isn't empty, you can extend this to:
#define PUT0() put_impl(0, NULL)
#define PUT1(_1) put_impl(1, (var_t[]){VAR(_1)})
#define PUT2(_1, _2) put_impl(2, (var_t[]){VAR(_1), VAR(_2)})
#define PUT3(_1, _2, _3) put_impl(3, (var_t[]){VAR(_1), VAR(_2), VAR(_3)})
#define SELECT_N(X, _1, _2, _3, N, ...) N
#define put(...) SELECT_N(X, ##__VA_ARGS__, PUT3, PUT2, PUT1,PUT0)(__VA_ARGS__)
You can extend this solution to arbitrarily many arguments if you like.
The implementation
The above macro invokes the function put_impl, which is the implementation of how to print an array of n variants. After all the tricks above, the functions is rather straightforward:
void put_impl(size_t n, const var_t var[])
{
for (size_t i = 0; i < n; i++) {
switch(var[i].type) {
case 'i': printf("%i", var[i].data.i); break;
case 'f': printf("%g", var[i].data.f); break;
case 's': printf("%s", var[i].data.s); break;
case '#': printf("[undef]"); break;
}
}
putchar('\n');
}
Putting it all together
The following program uses the method described above to print some rather silly stuff. It is not portable, but runs if compiled with gcc -std=gnu99:
#include <stdlib.h>
#include <stdio.h>
#define CHOOSE __builtin_choose_expr
#define IFTYPE(X, T) __builtin_types_compatible_p(typeof(X), T)
#define VAR(X) \
CHOOSE(IFTYPE(X, int), make_var_i, \
CHOOSE(IFTYPE(X, const char[]), make_var_s, \
CHOOSE(IFTYPE(X, const char *), make_var_s, \
CHOOSE(IFTYPE(X, double), make_var_f, \
make_var_0))))(X)
#define PUT0() put_impl(0, NULL)
#define PUT1(_1) put_impl(1, (var_t[]){VAR(_1)})
#define PUT2(_1, _2) put_impl(2, (var_t[]){VAR(_1), VAR(_2)})
#define PUT3(_1, _2, _3) put_impl(3, (var_t[]){VAR(_1), VAR(_2), VAR(_3)})
#define SELECT_N(X, _1, _2, _3, N, ...) N
#define put(...) SELECT_N(X, ##__VA_ARGS__, PUT3, PUT2, PUT1,PUT0)(__VA_ARGS__)
typedef struct var_t var_t;
struct var_t {
int type;
union {
int i;
double f;
const char *s;
} data;
};
var_t make_var_i(int X) { var_t v = {'i', {.i = X}}; return v; }
var_t make_var_s(const char *X) { var_t v = {'s', {.s = X}}; return v; }
var_t make_var_f(double X) { var_t v = {'f', {.f = X}}; return v; }
var_t make_var_0() { var_t v = {'#'}; return v; }
void put_impl(size_t n, const var_t var[])
{
for (size_t i = 0; i < n; i++) {
switch(var[i].type) {
case 'i': printf("%i", var[i].data.i); break;
case 'f': printf("%g", var[i].data.f); break;
case 's': printf("%s", var[i].data.s); break;
case '#': printf("[undef]"); break;
}
}
putchar('\n');
}
int main()
{
const char *name = "Fred";
double C = 12.5;
put(1, " ", 2);
put("Hello, I'm ", name, "!");
put();
put(C, " Celsius");
put(C * 1.8 + 32.0, " Fahrenheit");
return 0;
}
You can go crazy on the types and number of arguments you want to support, but keep inn mind that the bigger your jungle of macros gets, the harder it will be to maintain and to debug.
This solution is in no way generic, but it will get the job done for the very specific case asked in the question.
#include <stdio.h>
#define print(...) \
__builtin_choose_expr(__builtin_types_compatible_p(typeof(FIRST(__VA_ARGS__)), int), print_int, print_string)\
(__VA_ARGS__)
#define FIRST(A, ...) A
void print_int(int i, int j) {
printf("int: %d %d\n", i, j);
}
void print_string(char* s) {
printf("char*: %s\n", s);
}
int main(int argc, char* argv[]) {
print(1, 2);
print("this");
return 0;
}
If anyone can find a more generalized solution that will work consistently when new overloads are added that would be greatly appreciated.
After some work on the generic vector I asked about on this question, I would like to know if there is any way of checking that each instanciation of the library is only done once per type.
Here is what the current header file looks like:
#ifndef VECTOR_GENERIC_MACROS
#define VECTOR_GENERIC_MACROS
#ifndef TOKENPASTE
#define TOKENPASTE(a, b) a ## b
#endif
#define vector_t(T) TOKENPASTE(vector_t_, T)
#define vector_at(T) TOKENPASTE(*vector_at_, T)
#define vector_init(T) TOKENPASTE(vector_init_, T)
#define vector_destroy(T) TOKENPASTE(vector_destroy_, T)
#define vector_new(T) TOKENPASTE(vector_new_, T)
#define vector_delete(T) TOKENPASTE(vector_delete_, T)
#define vector_push_back(T) TOKENPASTE(vector_push_back_, T)
#define vector_pop_back(T) TOKENPASTE(vector_pop_back_, T)
#define vector_resize(T) TOKENPASTE(vector_resize_, T)
#define vector_reserve(T) TOKENPASTE(vector_reserve_, T)
#endif
typedef struct {
size_t size;
size_t capacity;
TYPE *data;
} vector_t(TYPE);
inline TYPE vector_at(TYPE)(vector_t(TYPE) *vector, size_t pos);
void vector_init(TYPE)(vector_t(TYPE) *vector, size_t size);
void vector_destroy(TYPE)(vector_t(TYPE) *vector);
inline TYPE *vector_new(TYPE)(size_t size);
inline void vector_delete(TYPE)(vector_t(TYPE) *vector);
void vector_push_back(TYPE)(vector_t(TYPE) *vector, TYPE value);
inline TYPE vector_pop_back(TYPE)(vector_t(TYPE) *vector);
inline void vector_resize(TYPE)(vector_t(TYPE) *vector, size_t size);
void vector_reserve(TYPE)(vector_t(TYPE) *vector, size_t size);
The header can then be included along with the source definitions:
#include <stdio.h>
#define TYPE int
#include "vector.h"
#include "vector.def"
#undef TYPE
int main()
{
vector_t(int) myVectorInt;
vector_init(int)(&myVectorInt, 0);
for (int i = 0; i < 10; ++i)
vector_push_back(int)(&myVectorInt, i);
for (int i = 0; i < myVectorInt.size; ++i)
printf("%d ", ++vector_at(int)(&myVectorInt, i));
vector_destroy(int)(&myVectorInt);
return 0;
}
I would like to make sure that the content below that last endif is only included once per TYPE.
Obviously, #ifdef VECTOR_INSTANCE(TYPE) does not work, so I'm really out of ideas...
It's a though question, however, I was also interested in the matter when I asked a similar question to yours some time ago.
My conclusions is that if you are going to use vectors (or, using more accurate naming, dynamic arrays) of many different types then it's wasteful to have all those functions vector_##TYPE##_reserve(), vector_##type##_resize(), etc... multiple times.
Instead, it is more efficient and clean to have those functions defined only once in a separate .c file, using your type's size as an extra argument. Those functions prototyped in a separate .h file. Then the same .h file would provide macros that generate functions wrappers for your own types, so that you don't see it using the size as an extra argument.
For example, your vector.h header would contain the following :
/* Declare functions operating on a generic vector type */
void vector_generic_resize(void *vector, size_t size, size_t data_size);
void vector_generic_push_back(void *vector, void *value, size_t data_size);
void *vector_generic_pop_back(void *vector, size_t data_size);
void vector_generic_init(void *vector, size_t size, size_t data_size);
void vector_generic_destroy(void *vector) ; // I don't think data_size is needed here
/* Taken from the example in the question */
#define VECTOR_DEFINITION(type)\
typedef struct {\
size_t size;\
size_t capacity;\
type *data;\
} vector_ ## type ## _t;\
/* Declare wrapper macros to make the above functions usable */
/* First the easy ones */
#define vector_resize(vector, size) vector_generic_resize(vector, size, sizeof(vector.data[0]))
#define vector_init(vector, size) vector_generic_init(vector, size, sizeof(vector.data[0]))
/* Type has to be given as an argument for the cast operator */
#define vector_pop_back(vector, type) (*(type*)(vector_generic_pop_back(vector, sizeof(vector.data[0]))))
/* This one is tricky, if 'value' is a constant, it's address cannot be taken.
I don't know if any better workarround is possible. */
#define vector_push_const(vector, type, value) \
{ \
type temp = value; \
vector_generic_push_back(vector, &temp, sizeof(vector.data[0]));\
}
/* Equivalent macro, but for pushing variables instead of constants */
#define vector_push_var(vector, value) vector_generic_push_back(vector, &value, sizeof(vector.data[0]))
/* Super-macro rediriging to constant or variable version of push_back depending on the context */
#define GET_MACRO(_1,_2,_3,NAME,...) NAME
#define vector_push_back(...) GET_MACRO(__VA_ARGS__, vector_push_const, vector_push_var)(__VA_ARGS__)
/* This macro isn't really needed, but just for homogenity */
#define vector_descroy(vector) vector_generic_destroy(vector)
The functions can then be used as you said in the example you linked, with the significant exception of vector_generic_push_back where unfortunately the type has to be specified each time as an extra macro argument.
So with this solution
You only have to do VECTOR_DEFINITION() within the .c file, avoiding the risk of declaring it with the same type twice
The vector library is only existing once in the binary
The macros can be used elegantly without using the type in their names, except for the pop back macro and the push literal macro.
If this is a problem you could make the push literal use long long always, it will work but potentially loose efficiency.
Similarly you could make the pop_back() macro and the vector_generic_pop_back() functions not return anything like they does in the C++ language, so that if you do both of those tricks you never need to use the type name explicitly in the macros.
As a reference, the main function you posted in the example that is linked in your question has to be adapted like that :
#include <stdio.h>
#include <stdlib.h>
#include "vector.h"
typedef unsigned int uint;
typedef char* str;
VECTOR_DEFINITION(uint)
VECTOR_DEFINITION(str)
int main()
{
vector_uint_t vector;
vector_init(&vector, 10);
for (unsigned int i = 0; i < vector.size; ++i)
vector.data[i] = i;
for (unsigned int i = 0; i < 10; ++i)
vector_push_back(&vector, i);
/* When pushing back a constant, we *have* to specity the type */
/* It is OK to use C keywords as they are supressed by the preprocessor */
vector_push_back(&vector, unsigned int, 12);
for (unsigned int i = 0; i < vector.size; ++i)
printf("%d ", vector.data[i]);
printf("\n");
vector_destroy(&vector);
vector_str_t sentence;
vector_init(&sentence, 0);
vector_push_back(&sentence, "Hello");
vector_push_back(&sentence, str, "World!"); /* Also possible, less efficient */
vector_push_back(&sentence, "How");
vector_push_back(&sentence, "are");
vector_push_back(&sentence, "you?");
for (unsigned int i = 0; i < sentence.size; ++i)
printf("%s ", sentence.data[i]);
printf("\n");
vector_destroy(&sentence);
return 0;
}
suggest:
remove the prototypes from the vector.h file.
place the prototypes at the top of the vector.def file.
remove the typedef struct from the vector.h file
place the typedef struct before the prototypes in the vector.def file.
then multiples #include statements for the vector.h file will have no bad effects.
Then use the following, in each source file that is to use these vector types:
#include<vector.h>
#define TYPE int
#include<vector.def>
#undef TYPE
#define TYPE char
#include<vector.def>
#undef TYPE
... etc
BTW:
There is no library involved, so I'm a bit confused by the reference
to 'library' in the question
It may be worthwhile to also prefix the 'static' modifier
to each of the function definitions so the definitions are
not visible across source files
It may be worthwhile to use parens around the parameters to TOKENPASTE
so modifiers like 'static' and.or 'const'
can be prefixed to the function names.
I am getting back into using C, but I've been spoiled by generics in other languages. I have made it to the following piece of code in my implementation of a resizable array:
typdef struct {
void** array;
int length;
int capacity;
size_t type_size;
} Vector;
void vector_add(Vector* v, void* entry) {
// ... code for adding to the array and resizing
}
int main() {
Vector* vector = vector_create(5, sizeof(int));
vector_add(vector, 4); // This is erroneous...
// ...
}
In my attempt to make this generic, I'm now unable to add an integer to the vector without storing it in memory somewhere else.
Is there any way to make this work (either as is, or possibly a better approach to generics)?
For my answer I am assuming that you are not familiar with the sections of memory (ie the use of the memory pool).
In my attempt to make this generic, I'm now unable to add an integer to the vector without storing it in memory somewhere else.
If you want to create a generic structure (as you did) then you will need to use void pointers. Consequently, from the use of void pointers you will need to store the values for each field on the memory pool, or uncommonly on the stack. Note, the structure is composed of void pointers and hence only memory addresses are contained within the structure, pointing to other locations in memory where the values are.
Be careful if you declare them on the stack as once your stack frame is popped from the call stack those memory addresses are not considered to be valid and hence may be used by another stack frame (overwriting your existing values within that collection of memory addresses).
Aside: If you migrate to C++ then you can consider the use of C++ templates.
Yes; you can embrace Greenspun's Tenth Rule and develop a full blown dynamic language in C, and in the process, develop a relatively clean C run time that can be used from within C.
In this project I did just that, as have others before me.
In the C run time of this project, a generic number would be created from a C number like this:
val n = num(42);
because of the way val is represented, it takes up only a machine word. A few bits of type tag are used to distinguish a number from a pointer, from a character, etc.
There is also this:
val n = num_fast(42);
which is much faster (a bit manipulation macro) because it doesn't do any special checks that the number 42 fits into the "fixnum" range; it's used for small integers.
A function that adds its argument to every element of a vector could be written (very inefficiently) like this:
val vector_add(val vec, val delta)
{
val iter;
for (iter = zero; lt(iter, length(vec)); iter = plus(iter, one)) {
val *pelem = vecref_l(vec, iter);
*pelem = plus(*pelem, delta);
}
return nil;
}
Since plus is generic, this will work with fixnums, bignums and reals, as well as with characters, since it is possible to add integer displacements to characters via plus.
Type mismatch errors will be caught by the lower level functions and turned into exceptions. For instance if vec isn't something to which length can be applied, length will throw.
Functions with a _l suffix return a location. Wherease vecref(v, i) returns the value at offset i in vector v, vecref_l(v, i) returns a pointer to the val typed location in the vector which stores that value.
It's all C, just with the ISO C rules bent a little bit: you can't make a type like val efficiently in strictly conforming C, but you can do it quite portably to architectures and compilers you care about supporting.
Our vector_add isn't generic enough. It's possible to do better:
val sequence_add(val vec, val delta)
{
val iter;
for (iter = zero; lt(iter, length(vec)); iter = plus(iter, one)) {
val elem = ref(vec, iter);
refset(vec, iter, plus(elem, delta));
}
return nil;
}
By using the generic ref and refset, this now works with lists and strings also, not only vectors. We can do something like:
val str = string(L"abcd");
sequence_add(str, num(2));
The contents of str will change to cdef since a displacement of 2 is added to each character, in place.
Your idea can be done:
int *new_int = (int*)malloc(sizeof(int));
*new_int = 4;
vector_add(vector, new_int);
Naturally, it would be a good idea to do a int *create_int(int x) function or something similar:
int *create_int(int x)
{
int *n = (int*)malloc(sizeof(int));
*n = 4;
return n;
}
//...
vector_add(vector, create_int(4));
If your environment allows it you may consider using a well tested, widely used library that already manages all that, such as Glib. Or even C++.
You can avoid having many many small allocations by storing the data instead of pointers to it, like
typedef struct {
char* array;
int length;
int capacity;
size_t type_size;
} Vector;
bool vector_add(Vector* v, void* entry)
{
if (v->length < v->capacity || vector_expand(v)) {
char* location = v->array + (v->length++)*(v->type_size);
memcpy(location, entry, v->type_size);
return 1;
}
return 0; // didn't fit
}
int main()
{
Vector* vector = vector_create(5, sizeof(int));
int value = 4;
vector_add(vector, &value); // pointer to local is ok because the pointer isn't stored, only used for memcpy
}
Yes, here's an implementation of mine (similar to yours) that may help. It uses macros that can be wrapped with function calls for immediate values.
#ifndef VECTOR_H
# define VECTOR_H
# include <stddef.h>
# include <string.h>
# define VECTOR_HEADROOM 4
/* A simple library for dynamic
* string/array manipulation
*
* Written by: Taylor Holberton
* During: July 2013
*/
struct vector {
void * data;
size_t size, len;
size_t headroom;
};
int vector_init (struct vector *);
size_t vector_addc (struct vector *, int index, char c);
size_t vector_subc (struct vector *, int index);
// these ones are just for strings (I haven't yet generalized them)
size_t vector_adds (struct vector *, int index, int iend, const char * c);
size_t vector_subs (struct vector *, int ibegin, int iend);
size_t vector_addi (struct vector *, int index, int i);
size_t vector_subi (struct vector *, int index);
# define vector_addm(v, index, datatype, element) \
do { \
if (!v) return 0; \
\
if (!v->size){ \
v->data = calloc (v->headroom, sizeof (datatype)); \
v->size = v->headroom; \
} \
\
datatype * p = v->data; \
\
if (v->len >= (v->size - 2)){ \
v->data = realloc (v->data, \
(v->size + v->headroom) * sizeof (datatype)); \
p = v->data; \
memset (&p[v->size], 0, v->headroom * sizeof(datatype));\
v->size += v->headroom; \
} \
\
if ((index < 0) || (index > v->len)){ \
index = v->len; \
} \
\
for (int i = v->len; i >= index; i--){ \
p[i + 1] = p[i]; \
} \
\
p[index] = element; \
\
v->len++; \
\
} while (0)
# define vector_subm(v, index, datatype) \
do { \
if (!v || !v->len){ \
return 0; \
} \
\
if ((index < 0) || (index > (v->len - 1))){ \
index = v->len - 1; \
} \
\
datatype * p = v->data; \
\
for (int i = index; i < v->len; i++){ \
p[i] = p[i + 1]; \
} \
\
v->len--; \
\
if ((v->size - v->len) > v->headroom){ \
v->data = realloc (v->data, ((v->size - v->headroom) + 1) * sizeof (datatype));\
v->size -= v->headroom; \
} \
\
} while (0)
#endif
And I usually wrap them like:
size_t vector_addi (struct vector * v, int index, int i){
vector_addm (v, index, int, i);
return v->len;
}
I haven't had this code-reviewed, but I've been using it in a large program I'm writing and I haven't had any memory errors from them (using valgrind).
The only thing that is really missing (I've been meaning to add) the ability to add and subtract arrays from arrays.
Edit: I believe you can also do this same sort of thing with stdarg.h, but I've never tried it.
You asked for a better approach? Here ist is: https://github.com/m-e-leypold/glitzersachen-demos/tree/master/generix/v0-2011 (Disclosure: This is my code).
Let me explain very shortly:
I wanted type safe generic containers (which in other languages would be provided by proper generics (Ada) or parametric polymorphism (OCaml). This is the the feature that is most missing in C.
Macros just cannot do it (I'm not
going to explain that in detail. Suffice to say: The result of a template expansion or
generic instantiation should be a module in it's own right: In C this means, there are pre
processor symbols exported respectively can be used for module configuration (like
-DUSE_PROCESS_QUEUE_DEBUGCODE) you couldn't do that if you used C macros to generate
instances.
I'm abstracting over element type by moving element size and all relevant operation into a descriptive structure. This will be passed to every invocation of the generic code. Note that the descriptor describes the element type, so a descriptor instance will be needed once per generic instance.
I'm using a template processor to create a thin type safe frontend module to the generic code.
Example:
This is the prototype for the generic code to retrieve an element:
void fifo_get ( fifo_DESCRIPTOR* inst, fifo* , void* var );
This is the descriptor type:
typedef struct fifo_DESCRIPTOR {
size_t maxindex;
size_t element_size;
} fifo_DESCRIPTOR;
This is the template code in the type safe wrapper template:
<<eT>> <<>>get ( <<T>>* f ) {
<<eT>> e; fifo_get( &DESCRIPTOR, (fifo*) f, (void*) &e ); return e;
}
And this is what the template expander (instantiating an generic) produces from the template:
float floatq_get ( floatq* f ) {
float e; fifo_get( &DESCRIPTOR, (fifo*) f, (void*) &e ); return e;
}
All this has a nice make integration, but hardly any type safety in instantiation. Every error only crops up when compiling with cc.
I cannot justify at the moment, why to stick with source text templates in C instead of migrating to C++. For me, it was just an experiment.
Regards.
This approach will probably horrify you, but it can be made to work if you don't need any type-specialized logic:
// vector.h
#ifndef VECTOR_H
#define VECTOR_H
#define VECTOR_IMP(itemType) \
typedef struct { \
itemType * array; \
int length; \
int capacity; \
} itemType##_Vector; \
\
static inline void itemType##_vector_add(itemType##_Vector* v, itemType v) { \
// implementation of adding an itemType object to the array goes here \
} \
\
[... other static-inline generic vector methods would go here ...] \
// Now we can "instantiate" versions of the Vector struct and methods for
// whatever types we want to use.
VECTOR_IMP(int);
VECTOR_IMP(float);
VECTOR_IMP(char);
#endif
... and some example calling code:
#include "vector.h"
int main(int argc, char ** argv)
{
float_Vector fv = {0};
int_Vector iv = {0};
char_Vector cv = {0};
int_vector_add(&iv, 5);
float_vector_add(&fv, 3.14f);
char_vector_add(&cv, 'A');
return 0;
}
Instead of having the vector class store the added object, you could just return a pointer to the location where the caller can store it:
typdef struct {
char *buffer;
size_t length;
size_t capacity;
size_t type_size;
} Vector;
void *vector_add(Vector* v)
{
if (v->length == v->capacity) {
// ... increase capacity by at least one
// ... realloc buffer to capacity * type_size
}
return v->buffer + v->type_size * v->length++;
}
// in main:
*(int*)vector_add(v) = 4;
Using some non-standard GNU C extensions, it is possible to define generic functions with inferred parameter types. This macro defines a nested function in a statement expression and infers the parameter type using typeof:
#include <stdio.h>
#define fib(n1) ({\
typeof(n1) func(typeof(n1) n){\
if (n <= 1)\
return n;\
return func(n-1) + func(n-2);\
}\
func(n1);\
})
int main()
{
printf("%d\n",fib(3));
printf("%f\n",fib(3.0));
return 0;
}