Dynamic allocation of a union of pointers - C - c

I am writing a basic library for low level image processing in C. I am aware that other (very good) libraries exist; this is a learning experience for me, not a means to an end.
I have defined the following (simplified, for this question) constructs:
union img_rawdata{
uint8_t* u8;
uint16_t* u16;
float* flt;
};
enum img_type{
IMG_U8,
IMG_U16,
IMG_FLT
};
struct image{
union img_rawdata rawdata;
enum img_type type;
unsigned int num_values;
};
My question is this: What is the favored way to dynamically allocate the proper pointer within the union?
Right now, the only way I see is to use a switch statement, like:
void alloc_img(struct image* img, enum img_type type, unsigned int num_vals){
switch (type){
case IMG_U8:
img->rawdata.u8 = (uint8_t*)malloc(num_vals*sizeof(uint8_t));
break;
case IMG_U16:
img->rawdata.u16 = (uint16_t*)malloc(num_vals*sizeof(uint16_t));
break;
case IMG_FLT:
img->rawdata.flt = (float*)malloc(num_vals*sizeof(float));
break;
}
}
This doesn't seem so bad; however, in my implementation, the actual memory allocation is about 50 lines (as rawdata is not one dimensional, error checking, etc.).
Is there any preprocessor magic that can reduce code redundancy, or is this the best way to go about writing this?
Or, alternatively, is there a different way altogether to approach the problem that will avoid this issue entirely?

[assuming all types of pointers including void * have the same size]
Modify what you have like so
union img_rawdata {
void * untyped;
uint8_t * u8;
uint16_t * u16;
float * flt;
};
enum img_type {
IMG_UNDEF = -1
IMG_U8 = 0,
IMG_U16,
IMG_FLT,
IMG_MAX
};
and add
const size_t img_type_size[IMG_MAX] = {
sizeof(*((union img_rawdata *) NULL)->u8),
sizeof(*((union img_rawdata *) NULL)->u16),
sizeof(*((union img_rawdata *) NULL)->flt)
};
then replace the switch by:
assert(IMG_UNDEF < type && IMG_MAX > type);
img->rawdata.untyped = malloc(num_vals * img_type_size[type]);

void alloc_img(struct image * img, enum img_type type, unsigned int num_vals){
size_t basesize = 0;
switch (type){
case IMG_U8:
basesize = sizeof(uint8_t);
break;
case IMG_U16:
basesize = sizeof(uint16_t);
break;
case IMG_FLT:
basesize = sizeof(float);
break;
}
if (basesize) {
img->rawdata.u8 = malloc(num_vals * basesize);
// assigning to u8 is the same as assigning to any other member
// but it may be better to use a void* as in
// img->rawdata.voidptr = malloc(num_vals * basesize);
} else {
fprintf(stderr, "default missing in switch\n");
}
}

Related

What are for square brackets in an addressed-of pointer?

I've been searching for a while what are for square-brackets in an addressed-of pointer, but I continue without understanding it. Here are the lines of the function, where "id" variable is an uint32_t pointer that has been previously passed as an argument.
#define LIST_STARTED (0x0001) /*defined out of the function*/
#define LIST_FIRST (0x0002) /*defined out of the function*/
uint32_t *pointer = id;
uint16_t *flags = &((uint16_t *)pointer)[0];
uint16_t *index = &((uint16_t *)pointer)[1];
bool started = *flags & LIST_STARTED;
bool first = *flags & LIST_FIRST;
if (!started){
/* does something */
*flags = MSEC_PRM_MGMT_LIST_STARTED;
return true;
}
if (!first){
/* does something */
*flags |= MSEC_PRM_MGMT_LIST_FIRST;
*index = 1;
return true;
}
if (*index == final_index)
/* does something */
return false;
*index += 1;
I understand what the logic of the function is, but I don't understand what do the following lines. I put "all" the code above in case it helps you.
uint16_t *flags = &((uint16_t *)pointer)[0];
uint16_t *index = &((uint16_t *)pointer)[1];
I would appreciate if someone could help me!
Thank you!
I edit to say that this C code works fine in an Embedded System, I'm not modifying it, I was just watching its behaviour.
The following code tries to read a uint32_t object as an array of two uint16_t objects:
uint32_t *id = ...;
uint32_t *pointer = id;
uint16_t *flags = &((uint16_t *)pointer)[0];
uint16_t *index = &((uint16_t *)pointer)[1];
and that is undefined behaviour to read a uint32_t object as 2 uint16_t objects through flags and index pointers because that breaks strict aliasing rules.
The correct way is:
uint16_t flags = *id; // The lowest order bits of id.
uint16_t index = *id >> 16; // The highest order bits of id.
In the above assignments of uint32_t to uint16_t it truncates the highest order bits of id.
uint32_t *pointer = id;
uint16_t *flags = &((uint16_t *)pointer)[0];
it is an equivalent of.
uint32_t *pointer = id;
uint16_t *flags = (uint16_t *)pointer;
The definition:
uint16_t *index = &((uint16_t *)pointer)[1];
Is an equivalent of:
uint16_t *temp = (uint16_t *)pointer;
uint16_t *index = temp + 1;
//or
uint16_t *index = &temp[1];
This is called: pointer punning and it is considered dangerous and not portable.
You can use unions for safe punning (at least when using gcc or its derivatives)
typedef union
{
uint64_t u64;
uint32_t u32[2];
uint16_t u16[4];
uint8_t u8[8];
struct
{
uint8_t n1: 4;
uint8_t n2: 4;
}u4[8];
}union_pune_t;
uint16_t foo16(uint32_t *p32)
{
union_pune_t *d64 = (void *)p32;
return d64 -> u16[1];
}
uint8_t foo8(uint32_t *p32)
{
union_pune_t *d64 = (void *)p32;
return d64 -> u8[5];
}
uint8_t foon4(uint32_t *p32)
{
union_pune_t *d64 = (void *)p32;
return d64 -> u4[9].n2;
}

Is there a way to define a "common" structure for multiple parameter numbers and types

I would like to create a common structure that I may use to pass parameters of multiple lengths and types into different functions.
As an example, consider the following structure:
typedef struct _list_t {
int ID;
char *fmt;
int nparams;
} list_t;
list_t infoList[100]; //this will be pre-populated with the operations my app offers
typedef struct _common {
int ID;
char *params;
} common;
A variable size function is used to pass in the parameters given the format is already populated:
int Vfunc(common * c, ...) {
va_list args;
va_start(args, c);
//code to search for ID in infoList and fetch its fmt
char params_buff[100]; //max params is 100
vsprintf(str_params, fmt, args);
va_end(args);
c->params = (char *)malloc(sizeof(char)*(strlen(params_buff)+1));
strncpy(c->params, params_buff, strlen(params_buff)+1);
}
int execute(common * c) {
if (c->ID == 1) { //add 2 numbers
int x, y; // i expect 2 numbers
//code to find ID in infoList and fetch its fmt
sscanf(c->params, fmt, &x, &y);
return (x + y);
}
else if (c->ID == 2) {
//do another operation, i expect an unsigned char array?
}
}
Main program will look somewhat like this:
int main()
{
common c;
c.ID = 1;
Vfunc(&c, 12, 2);
execute(&c);
return 0;
}
Now I can pass in the structure to any function, which will deal with the parameters appropriately. However I do not see a way to have unsigned char[] as one of the parameters since unsigned char arrays do not have a "format". The format of a char[] would be %s. Basically I want to pass in some raw data through this structure.
Is there a way to do this or a better implementation to fulfill the goal?
EDIT:
It seems that the questions goal is unclear. Say my application can provide arithmetic operations (like a calculator). Say a user of my application wants to add 2 numbers. All I want them to do is fill in this common structure, then pass it into lets say a function to get it executed. All the ID's of the operations will be known from lets say a manual, so the user will know how many parameters they can pass and what ID does what. As the app owner i will be filling infoList with the ID's I offer.
So this is just to give you an idea of what I mean by a "common structure". It can be implemented in other ways too, maybe you have a better way. But my goal is for the implementation to be able to pass in an unsigned char array. Can I do that?
As I understand your question, you want to save all argument values in a text string so that the values can be reconstructed later using sscanf. Further, you want to be able to handle array-of-number, e.g. an array of unsigned char.
And you ask:
Is there a way to do this
For your idea to work, it's required that sscanf can parse (aka match) the type of data that you want to use in your program. And - as you write in the question - scanf can't parse arrays-of-numbers. So the answer is:
No, it can't be done with standard functions.
So if you want to be able to handle arrays-of-number, you'll have to write your own scan-function. This includes
selection of a conversion specifier to tell the code to scan for an array (e.g. %b),
select a text-format for the array (e.g. "{1, 2, 3}")
a way to store the array-data and the size, e.g. struct {unsiged char* p; size_t nb_elements;}.
Further, you'll have the same problem with vsprintf. Again you need to write your own function.
EDIT
One alternative (that I don't really like myself) is to store pointer values. That is - instead of storing the array values in the string params you can store a pointer to the array.
The upside of that approach is that you can use the standard functions.
The downside is that the caller must ensure that the array exists until execute has been called.
In other words:
unsigned char auc[] = {1, 2, 3};
Vfunc(&c, auc);
execute(&c);
would be fine but
Vfunc(&c, (unsigned char[]){1, 2, 3});
execute(&c);
would compile but fail at run time.
And - as always with arrays in C - you may need an extra argument for the number of array elements.
Example code for this "save as pointer" approach could be:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
typedef struct _common {
int ID;
char *params;
} common;
void Vfunc(common * c, ...) {
va_list args;
va_start(args, c);
//code to search for ID in infoList and fetch its fmt
// For this example just use a fixed fmt
char fmt[] ="%p %zu";
char params_buff[100]; //max params is 100
vsprintf(params_buff, fmt, args);
va_end(args);
c->params = (char *)malloc(sizeof(char)*(strlen(params_buff)+1));
strncpy(c->params, params_buff, strlen(params_buff)+1);
}
int execute(common * c)
{
if (c->ID == 1) {
// expect pointer and number of array elements
unsigned char* a;
size_t nbe;
//code to find ID in infoList and fetch its fmt
// For this example just use a fixed fmt
char fmt[] ="%p %zu";
if (sscanf(c->params, fmt, &a, &nbe) != 2) exit(1);
// Calculate average
int sum = 0;
for (size_t i = 0; i < nbe; ++i) sum += a[i];
return sum;
}
return 0;
}
int main(void)
{
common c;
c.ID = 1;
unsigned char auc[] = {1, 2, 3, 4, 5, 6};
Vfunc(&c, auc, sizeof auc / sizeof auc[0]);
printf("The saved params is \"%s\"\n", c.params);
printf("Sum of array elements are %d\n", execute(&c));
return 0;
}
Possible Output
The saved params is "0xffffcc0a 6"
Sum of array elements are 21
Notice that it's not the array data that is saved but a pointer value.
I have read the problem once again, and find out that it's much simpler than you've described.
According to your statement, you already know about the type and order of data retrieval in execute() function. This make this problem much easier.
I must say, this problem is a bit difficult to solve in c, cause c can't resolve type at runtime or dynamically cast type at runtime. c must know all the types before hand i.e. at compile time.
Now, that said, c provides a way to handle variable length arguments. And that's a advantage.
So, what we've to do is:
cache all arguments from variable length arguments i.e. va_list.
and, provide a way to retrieve provided arguments from that cache.
At first, I am going to show you how to retrieve elements from cache if you know the type. We'll do it using a macro. I've named it sarah_next(). Well, after all, I've to write it because of you. You can name it as you want. It's definition is given below:
#define sarah_next(cache, type) \
(((cache) = (cache) + sizeof(type)), \
*((type*) (char *) ((cache) - sizeof(type))))
So, in simple words, sarah_next() retrieve the next element from cache and cast it to type.
Now, let's discuss the first problem, where we've to cache all arguments from va_list. You can do it easily by writing as follows:
void *cache = malloc(sizeof(char) * cacheSize);
// itr is an iterator, which iterates over cache
char *itr = (char *)cache;
// now, you can do
*(type *)itr = va_arg(buf, type);
// and then
itr += sizeof(type);
Another, point I would like to discuss is, I've used type hint to determine cache size. For that I've used a function getSize(). You would understand if you just look at it(also note: this gives you the ability to use your own custom type):
// getSize() is a function that returns type size based on type hint
size_t getSize(char type) {
if(type == 's') {
return sizeof(char *);
}
if(type == 'c') {
return sizeof(char);
}
if(type == 'i') {
return sizeof(int);
}
if(type == 'u') { // 'u' represents 'unsigned char'
return sizeof(unsigned char);
}
if(type == 'x') { // let's, say 'x' represents 'unsigned char *'
return sizeof(unsigned char *);
}
// you can add your own custom type here
// also note: you can easily use 'unsigned char'
// use something like 'u' to represent 'unsigned char'
// and you're done
// if type is not recognized, then
printf("error: unknown type while trying to retrieve type size\n");
exit(1);
}
Ok, I guess, the ideas are complete. Before moving on try to grasp the ideas properly.
Now, let me provide the full source code:
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
// note: it is the most fundamental part of this solution
// 'sarah_next' is a macro, that
// returns *(type *)buf means a value of type "type", and also
// increments 'buf' by 'sizeof(type)', so that
// it may target next element
// 'sarah_next' is used to retrieve data from task cache
// I've named it after you, you may choose to name it as you wish
#define sarah_next(cache, type) \
(((cache) = (cache) + sizeof(type)), \
*((type*) (char *) ((cache) - sizeof(type))))
// defining pool size for task pool
#define POOL_SIZE 1024
// notice: getSize() has been updated to support unsigned char and unsigned char *
// getSize() is a function that returns type size based on type hint
size_t getSize(char type) {
if(type == 's') {
return sizeof(char *);
}
if(type == 'c') {
return sizeof(char);
}
if(type == 'i') {
return sizeof(int);
}
if(type == 'u') { // 'u' represents 'unsigned char'
return sizeof(unsigned char);
}
if(type == 'x') { // let's, say 'x' represents 'unsigned char *'
return sizeof(unsigned char *);
}
// you can add your own custom type here
// also note: you can easily use 'unsigned char'
// use something like 'u' to represent 'unsigned char'
// and you're done
// if type is not recognized, then
printf("error: unknown type while trying to retrieve type size\n");
exit(1);
}
typedef struct __task {
int id;
void *cache;
} Task;
// notice: constructTask has been updated to support unsigned char and unsigned char *
// note: here, types contains type hint
Task *constructTask(int id, char *types, ...) {
// determine the size of task cache
int cacheSize = 0;
for(int i=0; types[i]; i++) {
cacheSize += getSize(types[i]);
}
// allocate memory for task cache
void *cache = malloc(sizeof(char) * cacheSize);
va_list buf;
va_start(buf, types);
// itr is an iterator, which iterates over cache
char *itr = (char *)cache;
for(int i=0; types[i]; i++) {
if(types[i] == 's') {
*(char **)itr = va_arg(buf, char *);
} else if(types[i] == 'x') { // added support for 'unsigned char *'
*(unsigned char **)itr = va_arg(buf, unsigned char *);
} else if(types[i] == 'c') {
// notice: i used 'int' not 'char'
// cause: compiler-warning: 'char' is promoted to 'int' when passed through '...'
// also note: this promotion helps with 'unsigned char'
*(char *)itr = (char)va_arg(buf, int); // so cast it to char
} else if(types[i] == 'u') { // added support 'unsigned char'
// notice: i used 'int' not 'unsigned char'
// cause: compiler-warning: 'unsigned char' is promoted to 'int' when passed through '...'
// also note: this promotion helps with 'unsigned char'
*(unsigned char *)itr = (unsigned char)va_arg(buf, int); // so cast it to unsigned char
} else if(types[i] == 'i') {
*(int *)itr = va_arg(buf, int);
}
// it won't come to else, cause getSize() would
// caught the type error first and exit the program
itr += getSize(types[i]);
}
va_end(buf);
// now, construct task
Task *task = malloc(sizeof(Task));
task->id = id;
task->cache = cache;
// and return it
return task;
}
// destroyTask is a function that frees memory of task cache and task
void destroyTask(Task *task) {
free(task->cache);
free(task);
}
// notice: that 'task->id == 4' processing part
// it is equivalant to your 'execute()' function
int taskProcessor(Task *task) {
// define ret i.e. return value
int ret = 999; // by default it is some code value, that says error
// note: you already know, what type is required in a task
if(task->id == 1) {
// note: see usage of 'sarah_next()'
int x = sarah_next(task->cache, int);
int y = sarah_next(task->cache, int);
ret = x + y;
} else if(task->id == 2) {
char *name = sarah_next(task->cache, char *);
if(strcmp(name, "sarah") == 0) {
ret = 0; // first name
} else if (strcmp(name, "cartenz") == 0) {
ret = 1; // last name
} else {
ret = -1; // name not matched
}
} else if(task->id == 3) {
int x = sarah_next(task->cache, int);
char *name = sarah_next(task->cache, char *);
int y = sarah_next(task->cache, int);
printf("%d %s %d\n", x, name, y); // notice: we've been able to retrieve
// both string(i.e. char *) and int
// you can also see for ch and int, but i can assure you, it works
ret = x + y;
} else if(task->id == 4) { // working with 'unsigned char *'
int a = sarah_next(task->cache, int);
unsigned char *x = sarah_next(task->cache, unsigned char *); // cast to unsigned char *
// char *x = sarah_next(task->cache, char *); // this won't work, would give wrong result
int b = sarah_next(task->cache, int);
printf("working with 'unsigned char *':");
for(int i=0; x[i]; i++) {
printf(" %d", x[i]); // checking if proper value is returned, that's why using 'integer'
}
printf("\n");
ret = a + b;
} else {
printf("task id not recognized\n");
}
return ret;
}
int main() {
Task *taskPool[POOL_SIZE];
int taskCnt = 0;
taskPool[taskCnt++] = constructTask(1, "ii", 20, 30); // it would return 50
taskPool[taskCnt++] = constructTask(1, "ii", 50, 70); // it would return 120
taskPool[taskCnt++] = constructTask(2, "s", "sarah"); // it would return 0
taskPool[taskCnt++] = constructTask(2, "s", "cartenz"); // it would return 1
taskPool[taskCnt++] = constructTask(2, "s", "reyad"); // it would return -1
taskPool[taskCnt++] = constructTask(3, "isi", 40, "sarah", 60); // it would print [40 sarah 60] and return 100
// notice: I've added an exmaple to showcase the use of unsigned char *
// also notice: i'm using value greater than 127, cause
// in most compiler(those treat char as signed) char supports only upto 127
unsigned char x[] = {231, 245, 120, 255, 0}; // 0 is for passing 'NULL CHAR' at the end of string
// 'x' is used to represent 'unsigned char *'
taskPool[taskCnt++] = constructTask(4, "ixi", 33, x, 789); // it would print ['working with unsigned char *': 231 245 120 255] and return 822
// note: if you used 'char *' cast to retrieve from 'cache'(using a compiler which treats char as signed), then
// it would print [-25 -11 120 -1] instead of [231 245 120 255]
// i guess, that makes it clear that you can perfectly use 'unsigned char *'
for(int i=0; i<taskCnt; i++) {
printf("task(%d): %d\n", i+1, taskProcessor(taskPool[i]));
printf("\n");
}
// at last destroy all tasks
for(int i=0; i<taskCnt; i++) {
destroyTask(taskPool[i]);
}
return 0;
}
The output is:
// notice the updated output
task(1): 50
task(2): 120
task(3): 0
task(4): 1
task(5): -1
40 sarah 60
task(6): 100
working with 'unsigned char *': 231 245 120 255
task(7): 822
So, you may be wondering, what advantage it may create over your given solution. Well, first of all you don't have to use %s %d etc. do determine format, which is not easy to change or create for each task and you've write a lot may be(for each task, you may have to write different fmt), and you don't have use vsprintf etc... which only deals with builtin types.
And the second and great point is, you can use your own custom type. Declare a struct type of your own and you can use. And it is also easy to add new type.
update:
I've forgot to mention another advantage, you can also use unsigned char with it. see, the updated getSize() function. you can use 'u' symbol for unsigned char, and as unsigned char is promoted to int, you can just cast it to (unsigned char) and done...
update-2(support for unsigned char *):
I have updated the code to support unsigned char and unsigned char *. To support new type, the functions, those you need to update, are getSize() and constructTask(). Compare the previous code and the newly updated code...you'll understand how to add new types(you can also add custom types of your own).
Also, take a look at task->id == 4 part in taskProcessor() function. I've added this to showcase the usage of unsigned char *. Hope this clears everything.
If you've any question, then ask me in the comment...
I think what you want from your description is likely a union of structs, with the 1st member of the union being an enumerator that defines the type of the structure in use, which is how polymorphism is often accomplished in C. Look at the X11 headers for a giant example of Xevents
A trivial example:
//define our types
typedef enum {
chicken,
cow,
no_animals_defined
} animal;
typedef struct {
animal species;
int foo;
char bar[20];
} s_chicken;
typedef struct {
animal species;
double foo;
double chew;
char bar[20];
} s_cow;
typedef union {
animal species; // we need this so the receiving function can identify the type.
s_chicken chicken ;
s_cow cow ;
} s_any_species;
now, this struct may be passed to a function and take on either identity. A receiving function of a type s_any_species may do to de-reference.
void myfunc (s_any_species any_species)
{
if (any_species.species == chicken)
any_species.chicken.foo=1 ;
}
Arrays of function pointers are preferable to long if else sequences here, but either will work
I take you to be asking about conveying a sequence of objects a varying type to a function. As a special detail, you want the function to receive only one actual argument, but this is not particularly significant because it is always possible to convert a function that takes multiple arguments into another that takes only one by wrapping the multiple parameters in a corresponding structure. I furthermore take the example Vfunc() code's usage of vsprintf() to be an implementation detail, as opposed to an essential component of the required solution.
In that case, notwithstanding my grave doubts about the usefulness of what you seem to want, as a C programming exercise it does not appear to be that difficult. The basic idea you seem to be looking for is called a tagged union. It goes by other names, too, but that one matches up well with the relevant C-language concepts and keywords. The central idea is that you define a type that can hold objects of various other types, one at a time, and that carries an additional member that identifies which type each instance currently holds.
For example:
enum tag { TAG_INT, TAG_DOUBLE, TAG_CHAR_PTR };
union tagged {
struct {
enum tag tag;
// no data -- this explicitly gives generic access to the tag
} as_any;
struct {
enum tag tag;
int data;
} as_int;
struct {
enum tag tag;
double data;
} as_double;
struct {
enum tag tag;
char *data;
} as_char_ptr;
// etc.
};
You could then combine that with a simple list wrapper:
struct arg_list {
unsigned num;
union tagged *args;
};
Then, given a function such as this:
int foo(char *s, double d) {
char[16] buffer;
sprintf(buffer, "%15.7e", d);
return strcmp(s, buffer);
}
You might then wrap it like so:
union tagged foo_wrapper(struct arg_list args) {
// ... validate argument count and types ...
return (union tagged) { .as_int = {
.tag = TAG_INT, .data = foo(args[0].as_char_ptr.data, args[1].as_double.data)
} };
}
and call the wrapper like so:
void demo_foo_wrapper() {
union tagged arg_unions[2] = {
{ .as_char_ptr = { .tag = TAG_CHAR_PTR, .data = "0.0000000e+00" },
{ .as_double = { .tag = TAG_DOUBLE, .data = 0.0 }
};
union tagged result = foo_wrapper((struct arg_list) { .num = 2, .args = arg_unions});
printf("result: %d\n", result.as_int.data);
}
Update:
I suggested tagged unions because the tags correspond to the field directives in the format strings described in the question, but if they aren't useful to you in practice then they are not an essential detail of this approach. If the called functions will work on the assumption that the caller has packed arguments correctly, and you have no other use for tagging the data with their types, then you can substitute a simpler, plain union for a tagged union:
union varying {
int as_int;
double as_double;
char *as_char_ptr;
// etc.
};
struct arg_list {
unsigned num;
union varying *args;
};
union varying foo_wrapper(struct arg_list args) {
return (union vaying) { .as_int = foo(args[0].as_char_ptr, args[1].as_double) };
}
void demo_foo_wrapper() {
union varying arg_unions[2] = {
.as_char_ptr = "0.0000000e+00",
.as_double = 0.0
};
union varying result = foo_wrapper((struct arg_list) { .num = 2, .args = arg_unions});
printf("result: %d\n", result.as_int);
}

Practical continuation passing style in C?

I have been writing a lot of C code recently, and I've been running into a problem similar to the one I ran into with Go where I have a lot of code that looks like this:
if (foo() != 0) {
return -1;
}
bar();
which is similar to the constant if err != nil checks I see in Golang. I think I've come up with an interesting pattern for dealing with these error-prone sequences. I was inspired by functional languages that have andThen sequences for chaining together computations which may or may not succeed. I tried implementing a naive callback setup, but I realized this is practically impossible in C without lambdas, and it would be callback hell even with them. Then I thought about using jump, and I realized there might be a good way to do it. The interesting part is below. Without using this pattern, there would be a lot of if (Buffer_strcpy(...) != 0) checks or a mess of callback hell.
switch (setjmp(reference)) {
case -1:
// error branch
buffer->offset = offset;
Continuation_error(continuation, NULL);
case 0:
// action 0
Buffer_strcpy(buffer, "(", andThenContinuation);
case 1:
// action 1 (only called if action 0 succeeds)
Node_toString(binaryNode->left, buffer, andThenContinuation);
case 2:
Buffer_strcpy(buffer, " ", andThenContinuation);
case 3:
Node_toString(binaryNode->right, buffer, andThenContinuation);
case 4:
Buffer_strcpy(buffer, ")", andThenContinuation);
case 5:
Continuation_success(continuation, buffer->data + offset);
}
And here is a self-contained program which runs it:
#include <string.h>
#include <stdio.h>
#include <setjmp.h>
/*
* A continuation is similar to a Promise in JavaScript.
* - success(result)
* - error(result)
*/
struct Continuation;
/*
* The ContinuationVTable is essentially the interface.
*/
typedef struct {
void (*success)(struct Continuation *, void *);
void (*error)(struct Continuation *, void *);
} ContinuationVTable;
/*
* And the Continuation is the abstract class.
*/
typedef struct Continuation {
const ContinuationVTable *vptr;
} Continuation;
void Continuation_success(Continuation *continuation, void *result) {
continuation->vptr->success(continuation, result);
}
void Continuation_error(Continuation *continuation, void *result) {
continuation->vptr->error(continuation, result);
}
/*
* This is the "Promise" implementation we're interested in right now because it makes it easy to
* chain together conditional computations (those that should only proceed when upstream
* computations succeed).
*/
typedef struct {
// Superclass (this way the vptr will be in the expected spot when we cast this class)
Continuation super;
// Stores a reference to the big struct which contains environment context (basically a bunch
// of registers). This context is pretty similar to the context that you'd need to preserve
// during a function call.
jmp_buf *context;
// Allow computations to return a result.
void **result;
// The sequence index in the chain of computations.
int index;
} AndThenContinuation;
void AndThenContinuation_success(Continuation *continuation, void *result) {
AndThenContinuation *andThenContinuation = (AndThenContinuation *) continuation;
if (andThenContinuation->result != NULL) {
*andThenContinuation->result = result;
}
++andThenContinuation->index;
longjmp(*andThenContinuation->context, andThenContinuation->index);
}
void AndThenContinuation_error(Continuation *continuation, void *result) {
AndThenContinuation *andThenContinuation = (AndThenContinuation *) continuation;
if (andThenContinuation->result != NULL) {
*andThenContinuation->result = result;
}
longjmp(*andThenContinuation->context, -1);
}
const ContinuationVTable andThenContinuationVTable = (ContinuationVTable) {
.success = AndThenContinuation_success,
.error = AndThenContinuation_error,
};
void AndThenContinuation_init(AndThenContinuation *continuation, jmp_buf *context, void **result) {
continuation->super.vptr = &andThenContinuationVTable;
continuation->index = 0;
continuation->context = context;
continuation->result = result;
}
This part is an example of its use:
/*
* I defined a buffer class here which has methods to write to the buffer, which might fail if the
* buffer is out of bounds.
*/
typedef struct {
char *data;
size_t offset;
size_t capacity;
} Buffer;
void Buffer_strcpy(Buffer *buffer, const void *src, Continuation *continuation) {
size_t size = strlen(src) + 1;
if (buffer->offset + size > buffer->capacity) {
Continuation_error(continuation, NULL);
return;
}
memcpy(buffer->data + buffer->offset, src, size);
buffer->offset += size - 1; // don't count null character
Continuation_success(continuation, NULL);
}
/*
* A Node is just something with a toString method.
*/
struct NodeVTable;
typedef struct {
struct NodeVTable *vptr;
} Node;
typedef struct NodeVTable {
void (*toString)(Node *, Buffer *, Continuation *);
} NodeVTable;
void Node_toString(Node *node, Buffer *buffer, Continuation *continuation) {
node->vptr->toString(node, buffer, continuation);
}
/*
* A leaf node is just a node which copies its name to the buffer when toString is called.
*/
typedef struct {
Node super;
char *name;
} LeafNode;
void LeafNode_toString(Node *node, Buffer *buffer, Continuation *continuation) {
LeafNode *leafNode = (LeafNode *) node;
Buffer_strcpy(buffer, leafNode->name, continuation);
}
NodeVTable leafNodeVTable = (NodeVTable) {
.toString = LeafNode_toString,
};
void LeafNode_init(LeafNode *node, char *name) {
node->super.vptr = &leafNodeVTable;
node->name = name;
}
/*
* A binary node is a node whose toString method should simply return
* `(${toString(left)} ${toString(right)})`. However, we use the continuation construct because
* those toString calls may fail if the buffer has insufficient capacity.
*/
typedef struct {
Node super;
Node *left;
Node *right;
} BinaryNode;
void BinaryNode_toString(Node *node, Buffer *buffer, Continuation *continuation) {
BinaryNode *binaryNode = (BinaryNode *) node;
jmp_buf reference;
AndThenContinuation andThen;
AndThenContinuation_init(&andThen, &reference, NULL);
Continuation *andThenContinuation = (Continuation *) &andThen;
/*
* This is where the magic happens. The -1 branch is where errors are handled. The 0 branch is
* for the initial computation. Subsequent branches are for downstream computations.
*/
size_t offset = buffer->offset;
switch (setjmp(reference)) {
case -1:
// error branch
buffer->offset = offset;
Continuation_error(continuation, NULL);
case 0:
// action 0
Buffer_strcpy(buffer, "(", andThenContinuation);
case 1:
// action 1 (only called if action 0 succeeds)
Node_toString(binaryNode->left, buffer, andThenContinuation);
case 2:
Buffer_strcpy(buffer, " ", andThenContinuation);
case 3:
Node_toString(binaryNode->right, buffer, andThenContinuation);
case 4:
Buffer_strcpy(buffer, ")", andThenContinuation);
case 5:
Continuation_success(continuation, buffer->data + offset);
}
}
NodeVTable binaryNodeVTable = (NodeVTable) {
.toString = BinaryNode_toString,
};
void BinaryNode_init(BinaryNode *node, Node *left, Node *right) {
node->super.vptr = &binaryNodeVTable;
node->left = left;
node->right = right;
}
int main(int argc, char **argv) {
LeafNode a, b, c;
LeafNode_init(&a, "a");
LeafNode_init(&b, "b");
LeafNode_init(&c, "c");
BinaryNode root;
BinaryNode_init(&root, (Node *) &a, (Node *) &a);
BinaryNode right;
BinaryNode_init(&right, (Node *) &b, (Node *) &c);
root.right = (Node *) &right;
char data[1024];
Buffer buffer = (Buffer) {.data = data, .offset = 0};
buffer.capacity = sizeof(data);
jmp_buf reference;
AndThenContinuation continuation;
char *result;
AndThenContinuation_init(&continuation, &reference, (void **) &result);
switch (setjmp(reference)) {
case -1:
fprintf(stderr, "failure\n");
return 1;
case 0:
BinaryNode_toString((Node *) &root, &buffer, (Continuation *) &continuation);
case 1:
printf("success: %s\n", result);
}
return 0;
}
Really, I just want to know more about this style--what keywords should I be looking up? Is this style ever actually used?
Just to put my comment in an answer, here are a few thoughts. The first and foremost point, in my opinion, is that you are working in a procedural programming language where jumping is frowned upon and memory managing is a known pitfall. As such, it is probably best to go with a more known and much easier approach, which will be easily readable to your fellow coders:
if(foo() || bar() || anotherFunctions())
return -1;
If you need to return different error codes then yes, I would use multiple ifs.
Regarding answering the question directly, my second point is this is not very practical. You are implementing (quite cleverly I might add) a basic C++ classing systems along with something that almost looks like an exception system, albeit a basic one. The problem is, you rely heavily on the user of the framework to do a lot of management on their own - setting the jumps, initializing all the classes and using them correctly. It may be justified in the general class, but here you are implementing something not "native" to the language (and foreign to many of its users). The fact a "class" unrelated to your exception handling (the tree) needs to reference your Continuation directly is a red flag. A major improvement would probably be to provide a try function, such that the user just uses
if(try(f1, f2, f3, onError)) return -1;
Which would wrap all the usage of your structs, making them invisible, though still not disconnecting your continuation from the tree. Of course, this is getting quite close to that regular if above, and if you do it properly, you have a lot of memory management to do - threads, signals, what is supported? Can you make sure you never leak?
My final point, is not inventing the wheel. If you want try-except systems, change a language, or if you must use a preexisting library (I see exception4c is high on Google through SO, never used it though). If C is the tool of choice, return values, argument return values, and signal handlers would be my goto (pun intended?).
I would avoid setjmp/longjmp:
They make resource management hard.
Usage is uncommon, which makes code harder to understand and maintain.
For you particular example, you could avoid setjmp/longjmp with a state machine:
typedef enum {
error,
leftParen,
leftAction,
space,
rightAction,
rightParen,
done,
} State;
void* BinaryNode_toString(Node *node, Buffer *buffer) {
...
State state = leftParen;
while (true) {
switch (state) {
case error:
// error branch
return NULL;
case leftParen:
// action 0
state = Buffer_strcpy(buffer, "(", leftAction);
break;
case leftAction:
state = Node_toString(binaryNode->left, buffer, space);
break;
case space:
state = Buffer_strcpy(buffer, " ", rightAction);
break;
case rightAction:
state = Node_toString(binaryNode->right, buffer, rightParen);
break;
case rightParen:
state = Buffer_strcpy(buffer, ")", done);
break;
case done:
return buffer->data + offset;
}
}
}
State Buffer_strcpy(Buffer *buffer, const void *src, State nextState) {
size_t size = strlen(src) + 1;
if (buffer->offset + size > buffer->capacity) {
return error;
}
memcpy(buffer->data + buffer->offset, src, size);
buffer->offset += size - 1; // don't count null character
return nextState;
}
although personally I would just go with if checks with goto for error-handling, which is much more idiomatic in C:
void* BinaryNode_toString(Node *node, Buffer *buffer) {
...
if (!Buffer_strcpy(...)) goto fail;
if (!Node_toString(...)) goto fail;
if (!Buffer_strcpy(...)) goto fail;
...
fail:
// Unconditionally free any allocated resources.
...
}```

C - Populate a generic struct inside a function without malloc

I'm trying to build a generic function that can populate a struct without any dynamic memory allocation.
The following code is a naive example of what I'm trying to do.
This code will not compile as incomplete type 'void' is not assignable.
Please note that this is a toy example to highlight my problems. I don't really want to convert colours; I just want to highlight that the structures will be different in data types and size.
#include <stdio.h>
typedef struct {
int r;
int g;
int b;
} rgb_t;
typedef struct {
float c;
float m;
float y;
float k;
} cmyk_t;
typedef enum { RGB, CMYK } color_t;
void convert_hex_to_color(long hex, color_t colorType, void* const out) {
if (colorType == RGB) {
rgb_t temp = { 0 };
// Insert some conversion math here....
temp.r = 1;
temp.g = 2;
temp.b = 3;
*out = temp; //< [!]
} else
if (colorType == CMYK) {
cmyk_t temp = { 0 };
// Insert some conversion math here....
temp.c = 1.0;
temp.m = 2.0;
temp.y = 3.0;
temp.k = 4.0;
*out = temp; //< [!]
}
}
int main(void) {
// Given
long hex = 348576;
rgb_t mydata = { 0 };
convert_hex_to_color(hex, RGB, (void*)(&mydata));
// Then
printf("RGB = %i,%i,%i\r\n", mydata.r, mydata.g, mydata.b);
return 0;
}
For some additional context, I'm using C11 on an embedded system target.
What is the best[1] way to do this? Macro? Union?
Regards,
Gabriel
[1] I would define "best" as a good compromise between readability and safety.
The reason for the error is it is invalid to store via a void pointer: the compiler does not know what to store. You could cast the pointer as *(rgb_t *)out = temp; or *(cmyk_t *)out = temp;
Alternately, you could define temp as a pointer to the appropriate structure type and initialize it directly from out, without the cast that is not needed in C:
void convert_hex_to_color(long hex, color_t colorType, void *out) {
if (colorType == RGB) {
rgb_t *temp = out;
// Insert some conversion math here....
temp->r = 1;
temp->g = 2;
temp->b = 3;
} else
if (colorType == CMYK) {
cmyk_t *temp = out;
// Insert some conversion math here....
temp->c = 1.0;
temp->m = 2.0;
temp->y = 3.0;
temp->k = 4.0;
}
}
Note that the cast is not needed in C:
int main(void) {
// Given
long hex = 348576;
rgb_t mydata = { 0 };
convert_hex_to_color(hex, RGB, &mydata);
// Then
printf("RGB = %i,%i,%i\r\n", mydata.r, mydata.g, mydata.b);
return 0;
}
rgb_t temp = {0};
So that declares a variable on the stack of type rgb_t. So far so good, though you don't need that 0.
*out = temp;
Here is your problem: in C you can only copy memory of the same type. Ever. This has nothing to do with malloc, as your title suggests, this is just the basic language specification. Sure, some types provide implicit casts, but void* is not one of them.
So if you're copying a structure (rgb_t on the right side), the destination has to be of the same type. So change the line to this:
*(rgb_t *)out = temp;
The "best" way is not to mix unrelated structures in the same function, or in the same memory area for that matter. That's just messy design.
If you need to keep a unison API for two different forms of data, then a typesafe function-like macro might be one idea. You can fake such a macro to have a syntax similar to passing the data by pointer
void convert_hex_to_color(long hex, type* data)
But then use C11 _Generic to actually determine the correct type to use, rather than using dangerous void pointers. Since you can't pass parameters "by reference" to macros, you'd have to sneak in a variable assignment in there. Example:
#include <stdio.h>
typedef struct {
int r;
int g;
int b;
} rgb_t;
typedef struct {
float c;
float m;
float y;
float k;
} cmyk_t;
void convert_hex_to_color(long hex, void* data);
/*
Pretty prototype just for code documentation purposes.
Never actually defined or called - the actual macro will "mock" this function.
*/
#define convert_hex_to_color(hex, output) ( *(output) = _Generic(*(output), \
rgb_t: (rgb_t){ .r=1, .g=2, .b=3 }, \
cmyk_t: (cmyk_t){ .c=1.0, .m=2.0, .y=3.0, .k=4.0 } ) )
int main(void) {
// Given
long hex = 348576;
rgb_t myrgb = { 0 };
cmyk_t mycmyk = { 0 };
convert_hex_to_color(hex, &myrgb);
convert_hex_to_color(hex, &mycmyk);
printf("RGB = %i,%i,%i\r\n", myrgb.r, myrgb.g, myrgb.b);
printf("CMYK = %f,%f,%f,%f\r\n", mycmyk.c, mycmyk.m, mycmyk.y, mycmyk.k);
return 0;
}
Output:
RGB = 1,2,3
CMYK = 1.000000,2.000000,3.000000,4.000000
Just be aware that the _Generic support for type qualifiers (const etc) was shaky in C11 - some C11 compilers treated const rgb_t different from rgb_t, others treated them the same. This was one of the "bug fixes" in C17, so use C17 if available.
Frame challenge: It seems like you want to perform a different operation depending on the type that you pass into this function. Instead of using an enum to tell it what type you're passing in, and branching based on that enum, make use of C11's _Generic to handle that, and you don't even need to explicitly tell it what the type is on each call:
#include <stdio.h>
typedef struct {
int r;
int g;
int b;
} rgb_t;
typedef struct {
float c;
float m;
float y;
float k;
} cmyk_t;
inline void convert_hex_to_color_rgb(long hex, rgb_t *const out) {
(void) hex; // or whatever you're planning to do with 'hex'
out->r = 1;
out->g = 2;
out->b = 3;
}
inline void convert_hex_to_color_cmyk(long hex, cmyk_t *const out) {
(void) hex; // or whatever you're planning to do with 'hex'
out->c = 1.0;
out->m = 2.0;
out->y = 3.0;
out->k = 4.0;
}
#define convert_hex_to_color(hex, out) _Generic((out), \
rgb_t *: convert_hex_to_color_rgb((hex), (rgb_t *)(out)), \
cmyk_t *: convert_hex_to_color_cmyk((hex), (cmyk_t *)(out)) \
)
int main(void) {
// Given
long hex = 348576;
rgb_t mydata = { 0 };
cmyk_t mydatac = { 0 };
convert_hex_to_color(hex, &mydata);
convert_hex_to_color(hex, &mydatac);
// Then
printf("RGB = %i,%i,%i\r\n", mydata.r, mydata.g, mydata.b);
printf("CMYK = %f,%f,%f,%f\r\n", mydatac.c, mydatac.m, mydatac.y, mydatac.k);
return 0;
}

"dynamic array of static arrays"

How do you specify a dynamic array of static arrays in C?
I want to make a struct holding two dynamic arrays of static arrays.
struct indexed_face_set {
double * [3] vertices;
int * [3] faces;
};
This should hold a dynamic list of vertices, which are each 3 doubles, and a dynamic list of faces, which are each 3 ints.
The syntax is, well, C's approach to declarations is not the cleanest and C++ inherited that...
double (*vertices)[3];
That declaration means that vertices is a pointer to double [3] objects. Note that the parenthesis are needed, otherwise (as in double *vertices[3]) it would mean an array of 3 double*.
After some time you end up getting use to the inverted way of parenthesis on expressions...
For the specific case of a structure containing two arrays each of dimension 3, it would be simpler to make the arrays a part of the structure, rather than dynamically allocating them separately:
struct indexed_face_set
{
double vertices[3];
int faces[3];
};
However, there certainly could be cases where it makes sense to handle dynamic array allocation. In that case, you need a pointer to an array in the structure (and not an array of pointers). So, you would need to write:
struct indexed_face_set
{
double (*vertices)[3];
int (*faces)[3];
};
To allocate a complete struct indexed_face_set, you need to use something like new_indexed_face_set() and to free one you need to use something like destroy_indexed_face_set():
struct indexed_face_set *new_indexed_face_set(void)
{
struct indexed_face_set *new_ifs = malloc(sizeof(*new_ifs));
if (new_ifs != 0)
{
double (*v)[3] = malloc(sizeof(*v));
int (*f)[3] = malloc(sizeof(*f));
if (v == 0 || f == 0)
{
free(v);
free(f);
free(new_ifs);
new_ifs = 0;
}
else
{
new_ifs->vertices = v;
new_ifs->faces = f;
}
}
return(new_ifs);
}
void destroy_indexed_face_set(struct indexed_face_set *ifs)
{
if (ifs != 0)
{
free(ifs->vertices);
free(ifs->faces);
free(ifs);
}
}
Then you can use it like this:
void play_with_ifs(void)
{
struct indexed_face_set *ifs = new_indexed_face_set();
if (ifs != 0)
{
(*ifs->vertices)[0] = 3.14159;
(*ifs->vertices)[1] = 2.71813;
(*ifs->vertices)[2] = 1.61803;
(*ifs->faces)[0] = 31;
(*ifs->faces)[1] = 30;
(*ifs->faces)[2] = 29;
do_something_fancy(ifs);
destroy_indexed_face_set(ifs);
}
}
Note that the notation using pointers to arrays is moderately messy; one reason why people do not often use them.
You could use this fragment as the body of a header:
#ifndef DASS_H_INCLUDED
#define DASS_H_INCLUDED
struct indexed_face_set;
extern void play_with_ifs(void);
extern void do_something_fancy(struct indexed_face_set *ifs);
extern void destroy_indexed_face_set(struct indexed_face_set *ifs);
extern struct indexed_face_set *new_indexed_face_set(void);
#endif /* DASS_H_INCLUDED */
It doesn't need any extra headers included; it does not need the details of the structure definition for these functions. You'd wrap it in suitable header guards.
Because the code above is a bit messy when it comes to using the arrays, most people would use a simpler notation. The header above can be left unchanged, but the code could be changed to:
struct indexed_face_set
{
double *vertices;
int *faces;
};
struct indexed_face_set *new_indexed_face_set(void)
{
struct indexed_face_set *new_ifs = malloc(sizeof(*new_ifs));
if (new_ifs != 0)
{
double *v = malloc(3 * sizeof(*v));
int *f = malloc(3 * sizeof(*f));
if (v == 0 || f == 0)
{
free(v);
free(f);
free(new_ifs);
new_ifs = 0;
}
else
{
new_ifs->vertices = v;
new_ifs->faces = f;
}
}
return(new_ifs);
}
void destroy_indexed_face_set(struct indexed_face_set *ifs)
{
if (ifs != 0)
{
free(ifs->vertices);
free(ifs->faces);
free(ifs);
}
}
void play_with_ifs(void)
{
struct indexed_face_set *ifs = new_indexed_face_set();
if (ifs != 0)
{
ifs->vertices[0] = 3.14159;
ifs->vertices[1] = 2.71813;
ifs->vertices[2] = 1.61803;
ifs->faces[0] = 31;
ifs->faces[1] = 30;
ifs->faces[2] = 29;
do_something_fancy(ifs);
destroy_indexed_face_set(ifs);
}
}
This is much simpler to understand and use and would generally be regarded as more idiomatic C.
Since the size of each array is fixed, there's no particular need to record the size in the structure. If the sizes varied at runtime, and especially if some indexed face sets had, say, 8 vertices and 6 faces (cuboid?), then you might well want to record the sizes of the arrays in the structure. You'd also specify the number of vertices and number of faces in the call to new_indexed_face_set().

Resources