Context:
I am experimenting with functional programming patterns in C90.
Goal:
This is what I'm trying to achieve in ISO C90:
struct mut_arr tmp = {0};
/* ... */
struct arr const res_c99 = {tmp};
Initializing a const struct member of type struct mut_arr with a lvalue (tmp).
#include <stdio.h>
enum
{
MUT_ARR_LEN = 4UL
};
struct mut_arr
{
unsigned char bytes[sizeof(unsigned char const) * MUT_ARR_LEN];
};
struct arr {
struct mut_arr const byte_arr;
};
static struct arr map(struct arr const* const a,
unsigned char (*const op)(unsigned char const))
{
struct mut_arr tmp = {0};
size_t i = 0UL;
for (; i < sizeof(tmp.bytes); ++i) {
tmp.bytes[i] = op(a->byte_arr.bytes[i]);
}
struct arr const res_c99 = {tmp};
return res_c99;
}
static unsigned char op_add_one(unsigned char const el)
{
return el + 1;
}
static unsigned char op_print(unsigned char const el)
{
printf("%u", el);
return 0U;
}
int main() {
struct arr const a1 = {{{1, 2, 3, 4}}};
struct arr const a2 = map(&a1, &op_add_one);
map(&a2, &op_print);
return 0;
}
This is what I tried in C90:
#include <stdio.h>
#include <string.h>
enum {
MUT_ARR_LEN = 4UL
};
struct mut_arr {
unsigned char bytes[sizeof(unsigned char const) * MUT_ARR_LEN];
};
struct arr {
struct mut_arr const byte_arr;
};
struct arr map(struct arr const* const a,
unsigned char (*const op)(unsigned char const))
{
struct arr const res = {0};
unsigned char(*const res_mut_view)[sizeof(res.byte_arr.bytes)] =
(unsigned char(*const)[sizeof(res.byte_arr.bytes)]) & res;
struct mut_arr tmp = {0};
size_t i = 0UL;
for (; i < sizeof(tmp.bytes); ++i) {
tmp.bytes[i] = op(a->byte_arr.bytes[i]);
}
memcpy(res_mut_view, &tmp.bytes[0], sizeof(tmp.bytes));
return res;
}
unsigned char op_add_one(unsigned char const el) { return el + 1; }
unsigned char op_print(unsigned char const el) {
printf("%u", el);
return 0U;
}
int main() {
struct arr const a1 = {{{1, 2, 3, 4}}};
struct arr const a2 = map(&a1, &op_add_one);
map(&a2, &op_print);
return 0;
}
All I do is to create an "alternate view" (making it essentially writable). Hence, I cast the returned address to unsigned char(*const)[sizeof(res.byte_arr.bytes)].
Then, I use memcpy, and copy the contents of the tmp to res.
I also tried to use the scoping mechanism to circumvent initializing in the beginning.
But it does not help, since there cannot be a runtime evaluation.
This works, but it is not anything like the C99 solution above.
Is there perhaps a more elegant way to pull this off?
PS: Preferably, the solution should be as portable as possible, too. (No heap allocations, only static allocations. It should remain thread-safe. These programs above seem to be, as I only use stack allocation.)
Union it.
#include <stdio.h>
#include <string.h>
enum {
MUT_ARR_LEN = 4UL
};
struct mut_arr {
unsigned char bytes[sizeof(unsigned char) * MUT_ARR_LEN];
};
struct arr {
const struct mut_arr byte_arr;
};
struct arr map(const struct arr *a, unsigned char (*op)(unsigned char)) {
union {
struct mut_arr tmp;
struct arr arr;
} u;
size_t i = 0;
for (; i < sizeof(u.tmp.bytes); ++i) {
u.tmp.bytes[i] = op(a->byte_arr.bytes[i]);
}
return u.arr;
}
unsigned char op_add_one(unsigned char el) {
return el + 1;
}
unsigned char op_print(unsigned char el) {
printf("%u", el);
return 0U;
}
int main() {
const struct arr a1 = {{{1, 2, 3, 4}}};
const struct arr a2 = map(&a1, &op_add_one);
map(&a2, &op_print);
return 0;
}
Let's throw some standard stuffs from https://port70.net/~nsz/c/c89/c89-draft.html .
One special guarantee is made in order to simplify the use of unions: If a union contains several structures that share a common initial sequence, and if the union object currently contains one of these structures, it is permitted to inspect the common initial part of any of them. Two structures share a common initial sequence if corresponding members have compatible types for a sequence of one or more initial members.
Two types have compatible type if their types are the same.
For two qualified types to be compatible, both shall have the identically qualified version of a compatible type;
The idea is that "common initial sequence" of mut_arr and arr is unsigned char [sizeof(unsigned char) * MUT_ARR_LEN]; so you can access one using the other.
However, as I read it now, it is unspecified if "initial sequence if corresponding members" includes nested struct members or not. So technically to be super standard compliant, you would:
struct arr map(const struct arr *a, unsigned char (*op)(unsigned char)) {
struct mutmut_arr {
struct mut_arr byte_arr;
};
union {
struct mutmut_arr tmp;
struct arr arr;
} u;
size_t i = 0;
for (; i < sizeof(u.tmp.bytes); ++i) {
u.tmp.byte_arr.bytes[i] = op(a->byte_arr.bytes[i]);
}
return u.arr;
}
#subjective I do want to note two things.
The placement of const type qualifier in your code is very confusing. It's typical in C to write const <type> not <type> const. It's typical to align * to the right with space on the left. I was not able to read your code efficiently at all. I removed almost all const from the code above.
Creating such interface as presented will be pain with no great benefits, with a lot of edge cases with lurking undefined behaviors around the corner. In C programming language, trust the programmer - it's one of the principles of C programming language. Do not prevent the programmer to do what has to be done (initializing a structure member). I would advise making the member mutable and have one structure definition and call it day. const qualified structure members usually are just hard to deal with, with no big benefits.
My answer might sound outrageous at first glance. It is
STOP WHAT YOU ARE DOING, NOW!
I will take my time to explain and give you a glimpse into your future (which is dim, if you pursue this idea) and try to convince you. But the gist of my answer is the bold line above.
Your prototype omits crucial parts to have some lasting solution to your "functional programming in C" approach. For example, you only have arrays of bytes (unsigned char). But for a "real" solution for "real" programmers, you need to consider different types. If you go to hoogle (Haskells online type and function browser engine thingy), you will notice, that fmap, which is the functional feature you try to achieve in C is defined as:
fmap :: Functor f => (a -> b) -> f a -> f b
This means, the mapping is not always from type a to type a. It's a monadic thingy, you try to offer your C programming fellows. So, an array of type element type a needs to be mapped to an array of element type b. Hence, your solution needs to offer not just arrays of bytes.
In C, arrays can reside in different types of memory and we cannot hide this very well. (In real functional languages, memory management is kind of abstracted away for the larger part and you just do not care. But in C, you must care. The user of your library must care and you need to allow them to dutifully care. Arrays can be global, on the stack, on the heap, in shared memory, ... and you need to offer a solution, allowing all that. Else, it will always just be a toy, propagating an illusion, that "it is possible and useful".
So, with just allowing arrays of different, custom types (someone will want arrays of arrays of a type as well, mind you!) and to be aware of memory management, how could a header file of your next evolution look like. Here is what I came up with:
#ifndef __IMMUTABLE_ARRAY_H
#define __IMMUTABLE_ARRAY_H
#include <stdint.h>
#include <stdlib.h>
#include <stdatomic.h>
// lacking namespaces or similar facilities in C, we use
// the prefix IA (Immutable Array) in front of all the stuff
// declared in this header.
// Wherever you see a naked `int`, think "bool".
// 0 -> false, 1 -> true.
// We do not like stdbool.h because sometimes trouble
// ensues in mixed C/C++ code bases on some targets, where
// sizeof(C-bool) != sizeof(C++-bool) o.O. So we cannot use
// C-bool in headers...
// We need storage classes!
// There are arrays on heap, static (global arrays),
// automatic arrays (on stack, maybe by using alloca),
// arrays in shared memory, ....
// For those different locations, we need to be able to
// perform different actions, e.g. for cleanup.
// IAStorageClass_t defines the behavior for a specific
// storage class.
// There is also the case of an array of arrays to consider...
// where we would need to clean up each member of the array
// once the array goes out of scope.
struct IAArray_tag;
typedef struct IAArray_tag IAArray_t;
typedef struct IAStorageClass_tag IAStorageClass_t;
typedef int (*IAArrayAllocator) (IAStorageClass_t* sclass,
size_t elementSize,
size_t capacity,
void* maybeStorage,
IAArray_t* target);
typedef void (*IAArrayDeleter) (IAArray_t* arr);
typedef void (*IAArrayElementDeleter) (IAArray_t* arr);
typedef int64_t (*IAArrayAddRef) (IAArray_t* arr);
typedef int64_t (*IAArrayRelease) (IAArray_t* arr);
typedef struct IAStorageClass_tag {
IAArrayAllocator allocator;
IAArrayDeleter deleter;
IAArrayElementDeleter elementDeleter;
IAArrayAddRef addReffer;
IAArrayRelease releaser;
} IAStorageClass_t;
enum IAStorageClassID_tag {
IA_HEAP_ARRAY = 0,
IA_STACK_ARRAY = 1,
IA_GLOBAL_ARRAY = 2,
IA_CUSTOM_CLASSES_BEGIN = 100
};
typedef enum IAStorageClassID_tag IAStorageClassID_t;
// creates the default storage classes (for heap and automatic).
void IAInitialize();
void IATerminate();
// returns a custom and dedicated identifier of the storage class.
int32_t
IARegisterStorageClass
(IAArrayAllocator allocator,
IAArrayDeleter deleter,
IAArrayElementDeleter elementDeleter,
IAArrayAddRef addReffer,
IAArrayRelease releaser);
struct IAArray_tag {
const IAStorageClass_t* storageClass;
int64_t refCount;
size_t elementSize; // Depends on the type you want to store
size_t capacity;
size_t length;
void* data;
};
// to make sure, uninitialized array variables are properly
// initialized to a harmless state.
IAArray_t IAInitInstance();
// allows to check if we ran into some uninitialized instance.
// In C++, this would be like after default constructor.
// See IAInitInstance().
int IAIsArray(IAArray_t* arr);
int
IAArrayCreate
(int32_t storageClassID,
size_t elementSize, // the elementSize SHALL be padded to
// a system-acceptable alignment size.
size_t capacity,
size_t size,
void* maybeStorage,
IAArray_t* target);
typedef
int
(*IAInitializerWithIndex_t)
(size_t index,
void* elementPtr);
int
IAArrayCreateWithInitializer
(int32_t storageClassID,
size_t elementSize,
size_t capacity,
void* maybeStorage,
IAInitializerWithIndex_t initializer,
IAArray_t* target);
IAArray_t* IAArrayAddReference(IAArray_t* arr);
void IAArrayReleaseReference(IAArray_t* arr);
// The one and only legal way to access elements within the array.
// Shortcutters, clever guys and other violators get hung, drawn
// and quartered!
const void * const IAArrayAccess(IAArray_t* arr, size_t index);
typedef void (*IAValueMapping_t)
(size_t index,
void* sourceElementPtr,
size_t sourceElementSize,
void* targetElementPtr,
size_t targetElementSize);
size_t IAArraySize(IAArray_t* arr);
size_t IAArrayCapacity(IAArray_t* arr);
size_t IAArrayElementSize(IAArray_t* arr);
// Because of reasons, we sometimes want to recycle
// an array and populate it with new values.
// This can only be referentially transparent and safe,
// if there are no other references to this array stored
// anywhere. i.e. if refcount == 1.
// If our app code passed the array around to other functions,
// some nasty ones might sneakily store themselves a pointer
// to an array and then the refcount > 1 and we cannot
// safely recycle the array instance.
// Then, we have to release it and create ourselves a new one.
int IACanRecycleArray(IAArray_t* arr);
// Starship troopers reporter during human invasion
// of bug homeworld: "It is an ugly planet, a bug planet!"
// This is how we feel about C. Map needs some noisy extras,
// just because C does not allow to build new abstractions with
// types. Yes, we could send Erich Gamma our regards and pack
// all the noise into some IAArrayFactory * :)
int
IAArrayMap(IAValueMapping_t mapping,
IAArray_t* source,
int32_t targetStorageClassID,
size_t targetElementSize,
void* maybeTargetStorage,
IAArray_t* target);
#endif
Needless to say, that I did not bother to implement my cute immutable-array.h in my still empty immutable-array.c, yes?
But once we did it, the joy woulds begin and we could write robust, functional C programs, yes? No! This is how well written functional C application code using those arrays might look like:
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <stdatomic.h>
#include <math.h>
#include <assert.h>
#include "immutable-array.h"
typedef struct F64FloorResult_tag {
double div;
double rem;
} F64FloorResult_t;
void myFloor(double number, F64FloorResult_t* result) {
if (NULL != result) {
result->div = floor(number);
result->rem = number - result->div;
}
}
int randomDoubleInitializer(size_t index, double* element) {
if (NULL != element) {
*element = ((double)rand()) / (double)RAND_MAX;
return 1;
}
return 0;
}
void
doubleToF64FloorMapping
(size_t index,
double* input,
size_t inputElementSize,
F64FloorResult_t *output,
size_t outputElementSize) {
assert(sizeof(double) == inputElementSize);
assert(sizeof(F64FloorResult_t) == outputElementSize);
assert(NULL != input);
assert(NULL != output);
myFloor(*input, output);
}
int main(int argc, const char* argv[]) {
IAInitialize();
{
double sourceData[20];
IAArray_t source = IAInitInstance();
if (IAArrayCreateWithInitializer
((IAStorageClassID_t)IA_STACK_ARRAY,
sizeof(double),
20,
&sourceData[0],
(IAInitializerWithIndex_t)randomDoubleInitializer,
&source)) {
IAArray_t result = IAInitInstance();
F64FloorResult_t resultData[20];
if (IAArrayMap
((IAValueMapping_t)doubleToF64FloorMapping,
&source,
(int32_t)IA_STACK_ARRAY,
sizeof(F64FloorResult_t),
&result)) {
assert(IAArraySize(&source) == IAArraySize(&result));
for (size_t index = 0;
index < IAArraySize(&source);
index++) {
const double* const ival =
(const double* const)IAArrayAccess(&source, index);
const F64FloorResult_t* const oval =
(const F64FloorResult_t* const)
IAArrayAccess(&result,index);
printf("(%g . #S(f64floorresult_t :div %g :rem %g))\n",
*ival, oval->div, oval->rem);
}
IAArrayReleaseReference(&result);
}
IAArrayReleaseReference(&source);
}
}
IATerminate();
return 0;
}
I see already the knives coming out of the satchels of your colleagues if you try to impose such a monstrosity upon them. They will hate you, you will hate yourself. Eventually, you will hate that you ever had the idea to even try.
Especially, if in a more suitable language, the same code might look like this:
(map 'list #'(lambda (x) (multiple-value-list (floor x)))
(loop repeat 20
for x = (random 1.0)
collecting x))
I would like this to work, but it does not:
#include <stdio.h>
typedef struct closure_s {
void (*incrementer) ();
void (*emitter) ();
} closure;
closure emit(int in) {
void incrementer() {
in++;
}
void emitter() {
printf("%d\n", in);
}
return (closure) {
incrementer,
emitter
};
}
main() {
closure test[] = {
emit(10),
emit(20)
};
test[0] . incrementer();
test[1] . incrementer();
test[0] . emitter();
test[1] . emitter();
}
It actually does compile and does work for 1 instance ... but the second one fails. Any idea how to get closures in C?
It would be truly awesome!
Using FFCALL,
#include <callback.h>
#include <stdio.h>
static void incrementer_(int *in) {
++*in;
}
static void emitter_(int *in) {
printf("%d\n", *in);
}
int main() {
int in1 = 10, in2 = 20;
int (*incrementer1)() = alloc_callback(&incrementer_, &in1);
int (*emitter1)() = alloc_callback(&emitter_, &in1);
int (*incrementer2)() = alloc_callback(&incrementer_, &in2);
int (*emitter2)() = alloc_callback(&emitter_, &in2);
incrementer1();
incrementer2();
emitter1();
emitter2();
free_callback(incrementer1);
free_callback(incrementer2);
free_callback(emitter1);
free_callback(emitter2);
}
But usually in C you end up passing extra arguments around to fake closures.
Apple has a non-standard extension to C called blocks, which do work much like closures.
The ANSI C has not a support for closure, as well as nested functions. Workaround for it is usage simple "struct".
Simple example closure for sum two numbers.
// Structure for keep pointer for function and first parameter
typedef struct _closure{
int x;
char* (*call)(struct _closure *str, int y);
} closure;
// An function return a result call a closure as string
char *
sumY(closure *_closure, int y) {
char *msg = calloc(20, sizeof(char));
int sum = _closure->x + y;
sprintf(msg, "%d + %d = %d", _closure->x, y, sum);
return msg;
}
// An function return a closure for sum two numbers
closure *
sumX(int x) {
closure *func = (closure*)malloc(sizeof(closure));
func->x = x;
func->call = sumY;
return func;
}
Usage:
int main (int argv, char **argc)
{
closure *sumBy10 = sumX(10);
puts(sumBy10->call(sumBy10, 1));
puts(sumBy10->call(sumBy10, 3));
puts(sumBy10->call(sumBy10, 2));
puts(sumBy10->call(sumBy10, 4));
puts(sumBy10->call(sumBy10, 5));
}
Result:
10 + 1 = 11
10 + 3 = 13
10 + 2 = 12
10 + 4 = 14
10 + 5 = 15
On C++11 it will be achived by use lambda expression.
#include <iostream>
int main (int argv, char **argc)
{
int x = 10;
auto sumBy10 = [x] (int y) {
std::cout << x << " + " << y << " = " << x + y << std::endl;
};
sumBy10(1);
sumBy10(2);
sumBy10(3);
sumBy10(4);
sumBy10(5);
}
A result, after compilation with a flag -std=c++11.
10 + 1 = 11
10 + 2 = 12
10 + 3 = 13
10 + 4 = 14
10 + 5 = 15
A Working Definition of a Closure with a JavaScript Example
A closure is a kind of object that contains a pointer or reference of some kind to a function to be executed along with the an instance of the data needed by the function.
An example in JavaScript from https://developer.mozilla.org/en-US/docs/Web/JavaScript/Closures is
function makeAdder(x) {
return function(y) { // create the adder function and return it along with
return x + y; // the captured data needed to generate its return value
};
}
which could then be used like:
var add5 = makeAdder(5); // create an adder function which adds 5 to its argument
console.log(add5(2)); // displays a value of 2 + 5 or 7
Some of the Obstacles to Overcome with C
The C programming language is a statically typed language, unlike JavaScript, nor does it have garbage collection, and some other features that make it easy to do closures in JavaScript or other languages with intrinsic support for closures.
One large obstacle for closures in Standard C is the lack of language support for the kind of construct in the JavaScript example in which the closure includes not only the function but also a copy of data that is captured when the closure is created, a way of saving state which can then be used when the closure is executed along with any additional arguments provided at the time the closure function is invoked.
However C does have some basic building blocks which can provide the tools for creating a kind of closure. Some of the difficulties are (1) memory management is the duty of the programmer, no garbage collection, (2) functions and data are separated, no classes or class type mechanics, (3) statically typed so no run time discovery of data types or data sizes, and (4) poor language facilities for capturing state data at the time the closure is created.
One thing that makes something of a closure facility possible with C is the void * pointer and using unsigned char as a kind of general purpose memory type which is then transformed into other types through casting.
An update with new approach
My original posted answer seems to have been helpful enough that people have upvoted it however it had a constraint or two that I didn't like.
Getting a notification of a recent upvote, I took a look at some of the other posted answers and realized that I could provide a second approach that would overcome the problem that bothered me.
A new approach that removes a problem of the original approach
The original approach required function arguments to be passed on the stack. This new approach eliminates that requirement. It also seems much cleaner. I'm keeping the original approach below.
The new approach uses a single struct, ClosureStruct, along with two functions to build the closure, makeClosure() and pushClosureArg().
This new approach also uses the variable argument functionality of stdarg.h to process the captured arguments in the closure data.
Using the following in a C source code file requires the following includes:
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <stdarg.h>
typedef struct {
void (*p)(); // pointer to the function of this closure
size_t sargs; // size of the memory area allocated for closure data
size_t cargs; // current memory area in use for closure data
unsigned char * args; // pointer to the allocated closure data area
} ClosureStruct;
void * makeClosure(void (*p)(), size_t sargs)
{
// allocate the space for the closure management data and the closure data itself.
// we do this with a single call to calloc() so that we have only one pointer to
// manage.
ClosureStruct* cp = calloc(1, sizeof(ClosureStruct) + sargs);
if (cp) {
cp->p = p; // save a pointer to the function
cp->sargs = sargs; // save the total size of the memory allocated for closure data
cp->cargs = 0; // initialize the amount of memory used
cp->args = (unsigned char *)(cp + 1); // closure data is after closure management block
}
return cp;
}
void * pushClosureArg(void* cp, size_t sarg, void* arg)
{
if (cp) {
ClosureStruct* p = cp;
if (p->cargs + sarg <= p->sargs) {
// there is room in the closure area for this argument so make a copy
// of the argument and remember our new end of memory.
memcpy(p->args + p->cargs, arg, sarg);
p->cargs += sarg;
}
}
return cp;
}
This code is then used similar to the following:
// example functions that we will use with closures
// funcadd() is a function that accepts a closure with two int arguments
// along with three additional int arguments.
// it is similar to the following function declaration:
// void funcadd(int x1, int x2, int a, int b, int c);
//
void funcadd(ClosureStruct* cp, int a, int b, int c)
{
// using the variable argument functionality we will set our
// variable argument list address to the closure argument memory area
// and then start pulling off the arguments that are provided by the closure.
va_list jj;
va_start(jj, cp->args); // get the address of the first argument
int x1 = va_arg(jj, int); // get the first argument of the closure
int x2 = va_arg(jj, int);
printf("funcadd() = %d\n", a + b + c + x1 + x2);
}
int zFunc(ClosureStruct* cp, int j, int k)
{
va_list jj;
va_start(jj, cp->args); // get the address of the first argument
int i = va_arg(jj, int);
printf("zFunc() i = %d, j = %d, k = %d\n", i, j, k);
return i + j + k;
}
typedef struct { char xx[24]; } thing1;
int z2func( ClosureStruct* cp, int i)
{
va_list jj;
va_start(jj, cp->args); // get the address of the first argument
thing1 a = va_arg(jj, thing1);
printf("z2func() i = %d, %s\n", i, a.xx);
return 0;
}
int mainxx(void)
{
ClosureStruct* p;
int x;
thing1 xpxp = { "1234567890123" };
p = makeClosure(funcadd, 256);
x = 4; pushClosureArg(p, sizeof(int), &x);
x = 10; pushClosureArg(p, sizeof(int), &x);
p->p(p, 1, 2, 3);
free(p);
p = makeClosure(z2func, sizeof(thing1));
pushClosureArg(p, sizeof(thing1), &xpxp);
p->p(p, 45);
free(p);
p = makeClosure(zFunc, sizeof(int));
x = 5; pushClosureArg(p, sizeof(int), &x);
p->p(p, 12, 7);
return 0;
}
The output from the above usage is:
funcadd() = 20
z2func() i = 45, 1234567890123
zFunc() i = 5, j = 12, k = 7
However there is an issue with the above implementation, you have no way of getting the return value of a function that returns a value. In other words, the function zFunc() used in a closure above returns an int value which is ignored. If you try to capture the return value with something like int k = pint->p(pint, 12, 7); you will get an error message because the function pointer argument of ClosureStruct is void (*p)(); rather than int (*p)();.
To work around this restraint, we will add two C Preprocessor macros to help us create individual versions of the ClosureStruct struct that specify a function return type other than void.
#define NAME_CLOSURE(t) ClosureStruct_ ## t
#define DEF_CLOSURE(t) \
typedef struct { \
t (*p)(); \
size_t sargs; \
size_t cargs; \
unsigned char* args; \
} NAME_CLOSURE(t);
We then redefine the two functions, zFunc() and z2func(), as follows using the macros.
DEF_CLOSURE(int) // define closure struct that returns an int
int zFunc(NAME_CLOSURE(int)* cp, int j, int k)
{
va_list jj;
va_start(jj, cp->args); // get the address of the first argument
int i = va_arg(jj, int);
printf("zFunc() i = %d, j = %d, k = %d\n", i, j, k);
return i + j + k;
}
typedef struct { char xx[24]; } thing1;
int z2func( NAME_CLOSURE(int) * cp, int i)
{
va_list jj;
va_start(jj, cp->args); // get the address of the first argument
thing1 a = va_arg(jj, thing1);
printf("z2func() i = %d, %s\n", i, a.xx);
return 0;
}
And we use this as follows:
int mainxx(void)
{
ClosureStruct* p;
NAME_CLOSURE(int) *pint;
int x;
thing1 xpxp = { "1234567890123" };
p = makeClosure(funcadd, 256);
x = 4; pushClosureArg(p, sizeof(int), &x);
x = 10; pushClosureArg(p, sizeof(int), &x);
p->p(p, 1, 2, 3);
free(p);
pint = makeClosure(z2func, sizeof(thing1));
pushClosureArg(pint, sizeof(thing1), &xpxp);
int k = pint->p(pint, 45);
free(pint);
pint = makeClosure(zFunc, sizeof(int));
x = 5; pushClosureArg(pint, sizeof(int), &x);
k = pint->p(pint, 12, 7);
return 0;
}
First Implementation With Standard C and a Bit of Stretching Here and There
NOTE: The following example depends on a stack based argument passing convention as is used with most x86 32 bit compilers. Most compilers also allow for a calling convention to be specified other than stack based argument passing such as the __fastcall modifier of Visual Studio. The default for x64 and 64 bit Visual Studio is to use the __fastcall convention by default so that function arguments are passed in registers and not on the stack. See Overview of x64 Calling Conventions in the Microsoft MSDN as well as How to set function arguments in assembly during runtime in a 64bit application on Windows? as well as the various answers and comments in How are variable arguments implemented in gcc? .
One thing that we can do is to solve this problem of providing some kind of closure facility for C is to simplify the problem. Better to provide an 80% solution that is useful for a majority of applications than no solution at all.
One such simplification is to only support functions that do not return a value, in other words functions declared as void func_name(). We are also going to give up compile time type checking of the function argument list since this approach builds the function argument list at run time. Neither one of these things that we are giving up are trivial so the question is whether the value of this approach to closures in C outweighs what we are giving up.
First of all lets define our closure data area. The closure data area represents the memory area we are going to use to contain the information we need for a closure. The minimum amount of data I can think of is a pointer to the function to execute and a copy of the data to be provided to the function as arguments.
In this case we are going to provide any captured state data needed by the function as an argument to the function.
We also want to have some basic safe guards in place so that we will fail reasonably safely. Unfortunately the safety rails are a bit weak with some of the work arounds we are using to implement a form of closures.
The Source Code
The following source code was developed using Visual Studio 2017 Community Edition in a .c C source file.
The data area is a struct that contains some management data, a pointer to the function, and an open ended data area.
typedef struct {
size_t nBytes; // current number of bytes of data
size_t nSize; // maximum size of the data area
void(*pf)(); // pointer to the function to invoke
unsigned char args[1]; // beginning of the data area for function arguments
} ClosureStruct;
Next we create a function that will initialize a closure data area.
ClosureStruct * beginClosure(void(*pf)(), int nSize, void *pArea)
{
ClosureStruct *p = pArea;
if (p) {
p->nBytes = 0; // number of bytes of the data area in use
p->nSize = nSize - sizeof(ClosureStruct); // max size of the data area
p->pf = pf; // pointer to the function to invoke
}
return p;
}
This function is designed to accept a pointer to a data area which gives flexibility as to how the user of the function wants to manage memory. They can either use some memory on the stack or static memory or they can use heap memory via the malloc() function.
unsigned char closure_area[512];
ClosureStruct *p = beginClosure (xFunc, 512, closure_area);
or
ClosureStruct *p = beginClosure (xFunc, 512, malloc(512));
// do things with the closure
free (p); // free the malloced memory.
Next we provide a function that allows us to add data and arguments to our closure. The purpose of this function is to build up the closure data so that when closure function is invoked, the closure function will be provided any data it needs to do its job.
ClosureStruct * pushDataClosure(ClosureStruct *p, size_t size, ...)
{
if (p && p->nBytes + size < p->nSize) {
va_list jj;
va_start(jj, size); // get the address of the first argument
memcpy(p->args + p->nBytes, jj, size); // copy the specified size to the closure memory area.
p->nBytes += size; // keep up with how many total bytes we have copied
va_end(jj);
}
return p;
}
And to make this a bit simpler to use lets provide a wrapping macro which is generally handy but does have limitations since it is C Processor text manipulation.
#define PUSHDATA(cs,d) pushDataClosure((cs),sizeof(d),(d))
so we could then use something like the following source code:
unsigned char closurearea[256];
int iValue = 34;
ClosureStruct *dd = PUSHDATA(beginClosure(z2func, 256, closurearea), iValue);
dd = PUSHDATA(dd, 68);
execClosure(dd);
Invoking the Closure: The execClosure() Function
The last piece to this is the execClosure() function to execute the closure function with its data. What we are doing in this function is to copy the argument list supplied in the closure data structure onto the stack as we invoke the function.
What we do is cast the args area of the closure data to a pointer to a struct containing an unsigned char array and then dereference the pointer so that the C compiler will put a copy of the arguments onto the stack before it calls the function in the closure.
To make it easier to create the execClosure() function, we will create a macro that makes it easy to create the various sizes of structs we need.
// helper macro to reduce type and reduce chance of typing errors.
#define CLOSEURESIZE(p,n) if ((p)->nBytes < (n)) { \
struct {\
unsigned char x[n];\
} *px = (void *)p->args;\
p->pf(*px);\
}
Then we use this macro to create a series of tests to determine how to call the closure function. The sizes chosen here may need tweaking for particular applications. These sizes are arbitrary and since the closure data will rarely be of the same size, this is not efficiently using stack space. And there is the possibility that there may be more closure data than we have allowed for.
// execute a closure by calling the function through the function pointer
// provided along with the created list of arguments.
ClosureStruct * execClosure(ClosureStruct *p)
{
if (p) {
// the following structs are used to allocate a specified size of
// memory on the stack which is then filled with a copy of the
// function argument list provided in the closure data.
CLOSEURESIZE(p,64)
else CLOSEURESIZE(p, 128)
else CLOSEURESIZE(p, 256)
else CLOSEURESIZE(p, 512)
else CLOSEURESIZE(p, 1024)
else CLOSEURESIZE(p, 1536)
else CLOSEURESIZE(p, 2048)
}
return p;
}
We return the pointer to the closure in order to make it easily available.
An Example Using the Library Developed
We can use the above as follows. First a couple of example functions that don't really do much.
int zFunc(int i, int j, int k)
{
printf("zFunc i = %d, j = %d, k = %d\n", i, j, k);
return i + j + k;
}
typedef struct { char xx[24]; } thing1;
int z2func(thing1 a, int i)
{
printf("i = %d, %s\n", i, a.xx);
return 0;
}
Next we build our closures and execute them.
{
unsigned char closurearea[256];
thing1 xpxp = { "1234567890123" };
thing1 *ypyp = &xpxp;
int iValue = 45;
ClosureStruct *dd = PUSHDATA(beginClosure(z2func, 256, malloc(256)), xpxp);
free(execClosure(PUSHDATA(dd, iValue)));
dd = PUSHDATA(beginClosure(z2func, 256, closurearea), *ypyp);
dd = PUSHDATA(dd, 68);
execClosure(dd);
dd = PUSHDATA(beginClosure(zFunc, 256, closurearea), iValue);
dd = PUSHDATA(dd, 145);
dd = PUSHDATA(dd, 185);
execClosure(dd);
}
Which gives an output of
i = 45, 1234567890123
i = 68, 1234567890123
zFunc i = 45, j = 145, k = 185
Well What About Currying?
Next we could make a modification to our closure struct to allow us to do currying of functions.
typedef struct {
size_t nBytes; // current number of bytes of data
size_t nSize; // maximum size of the data area
size_t nCurry; // last saved nBytes for curry and additional arguments
void(*pf)(); // pointer to the function to invoke
unsigned char args[1]; // beginning of the data area for function arguments
} ClosureStruct;
with the supporting functions for currying and resetting of a curry point being
ClosureStruct *curryClosure(ClosureStruct *p)
{
p->nCurry = p->nBytes;
return p;
}
ClosureStruct *resetCurryClosure(ClosureStruct *p)
{
p->nBytes = p->nCurry;
return p;
}
The source code for testing this could be:
{
unsigned char closurearea[256];
thing1 xpxp = { "1234567890123" };
thing1 *ypyp = &xpxp;
int iValue = 45;
ClosureStruct *dd = PUSHDATA(beginClosure(z2func, 256, malloc(256)), xpxp);
free(execClosure(PUSHDATA(dd, iValue)));
dd = PUSHDATA(beginClosure(z2func, 256, closurearea), *ypyp);
dd = PUSHDATA(dd, 68);
execClosure(dd);
dd = PUSHDATA(beginClosure(zFunc, 256, closurearea), iValue);
dd = PUSHDATA(dd, 145);
dd = curryClosure(dd);
dd = resetCurryClosure(execClosure(PUSHDATA(dd, 185)));
dd = resetCurryClosure(execClosure(PUSHDATA(dd, 295)));
}
with the output of
i = 45, 1234567890123
i = 68, 1234567890123
zFunc i = 45, j = 145, k = 185
zFunc i = 45, j = 145, k = 295
GCC and clang have the blocks extension, which is essentially closures in C.
GCC supports inner functions, but not closures. C++0x will have closures. No version of C that I'm aware of, and certainly no standard version, provides that level of awesome.
Phoenix, which is part of Boost, provides closures in C++.
On this page you can find a description on how to do closures in C:
http://brodowsky.it-sky.net/2014/06/20/closures-in-c-and-scala/
The idea is that a struct is needed and that struct contains the function pointer, but gets provided to the function as first argument. Apart from the fact that it requires a lot of boiler plate code and the memory management is off course an issue, this works and provides the power and possibilities of other languages' closures.
You can achieve this with -fblocks flag, but it does not look so nice like in JS or TS:
#include <stdio.h>
#include <stdlib.h>
#include <Block.h>
#define NEW(T) ({ \
T* __ret = (T*)calloc(1, sizeof(T)); \
__ret; \
})
typedef struct data_t {
int value;
} data_t;
typedef struct object_t {
int (^get)(void);
void (^set)(int);
void (^free)(void);
} object_t;
object_t const* object_create(void) {
data_t* priv = NEW(data_t);
object_t* pub = NEW(object_t);
priv->value = 123;
pub->get = Block_copy(^{
return priv->value;
});
pub->set = Block_copy(^(int value){
priv->value = value;
});
pub->free = Block_copy(^{
free(priv);
free(pub);
});
return pub;
}
int main() {
object_t const* obj = object_create();
printf("before: %d\n", obj->get());
obj->set(321);
printf("after: %d\n", obj->get());
obj->free();
return 0;
}
clang main.c -o main.o -fblocks -fsanitize=address; ./main.o
before: 123
after: 321
The idiomatic way of doing it in is C is passing a function pointer and a void pointer to the context.
However, some time ago I came up with a different approach. Surprisingly, there is a family of builtin types in C that carries both a data and the code itself. Those are pointers to a function pointer.
The trick is use this single object to pass both the code by dereferencing a function pointer. And next passing the very same double function pointer as the context as a first argument. It looks a bit convoluted by actually it results in very flexible and readable machanism for closures.
See the code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// typedefing functions makes usually makes code more readable
typedef double double_fun_t(void*, double);
struct exponential {
// closure must be placed as the first member to allow safe casting
// between a pointer to `closure` and `struct exponential`
double_fun_t *closure;
double temperature;
};
double exponential(void *ctx_, double x) {
struct exponential *ctx = ctx_;
return exp(x / ctx->temperature);
}
// the "constructor" of the closure for exponential
double_fun_t **make_exponential(double temperature) {
struct exponential *e = malloc(sizeof *e);
e->closure = exponential;
e->temperature = temperature;
return &e->closure;
}
// now simple closure with no context, a pure x -> x*x mapping
double square(void *_unused, double x){
(void)_unused;
return x*x;
}
// use compound literal to transform a function to a closure
double_fun_t **square_closure = & (double_fun_t*) { square };
// the worker that process closures, note that `double_fun_t` is not used
// because `double(**)(void*,double)` is builtin type
double somme(double* liste, int length, double (**fun)(void*,double)){
double poids = 0;
for(int i=0;i<length;++i)
// calling a closure, note that `fun` is used for both obtaing
// the function pointer and for passing the context
poids = poids + (*fun)(fun, liste[i]);
return poids;
}
int main(void) {
double list[3] = { 1, 2, 3 };
printf("%g\n", somme(list, 3, square_closure));
// a dynamic closure
double_fun_t **exponential = make_exponential(42);
printf("%g\n", somme(list, 3, exponential));
free(exponential);
return 0;
}
The advantage of this approach is that the closure exports a pure interface for calling double->double functions. There is no need to introduce any boxing structures used by all clients of the closure. The only requirement is the "calling convention" which is very natural and does not require sharing any code.
Answer
#include <stdio.h>
#include <stdlib.h>
/*
File Conventions
----------------
alignment: similar statements only
int a = 10;
int* omg = {120, 5};
functions: dofunction(a, b, c);
macros: _do_macro(a, b, c);
variables: int dovariable=10;
*/
////Macros
#define _assert(got, expected, teardownmacro) \
do { \
if((got)!=(expected)) { \
fprintf(stderr, "line %i: ", __LINE__); \
fprintf(stderr, "%i != %i\n", (got), (expected)); \
teardownmacro; \
return EXIT_FAILURE; \
} \
} while(0);
////Internal Helpers
static void istarted() {
fprintf(stderr, "Start tests\n");
}
static void iended() {
fprintf(stderr, "End tests\n");
}
////Tests
int main(void)
{
///Environment
int localvar = 0;
int* localptr = NULL;
///Closures
#define _setup_test(mvar, msize) \
do { \
localptr=calloc((msize), sizeof(int)); \
localvar=(mvar); \
} while(0);
#define _teardown_test() \
do { \
free(localptr); \
localptr=NULL; \
} while(0);
///Tests
istarted();
_setup_test(10, 2);
_assert(localvar, 10, _teardown_test());
_teardown_test();
_setup_test(100, 5);
_assert(localvar, 100, _teardown_test());
_teardown_test();
iended();
return EXIT_SUCCESS;
}
Context
I was curious about how others accomplished this in C. I wasn't totally surprised when I didn't see this answer. Warning: This answer is not for beginners.
I live a lot more in the Unix style of thinking: lots of my personal programs and libraries are small and do one thing very well. Macros as "closures" are much safer in this context. I believe all the organization and specified conventions for readability is super important, so the code is readable by us later, and a macro looks like a macro and a function looks like a function. To clarify, not literally these personal conventions, just having some, that are specified and followed to distinguish different language constructs (macros and functions). We all should be doing that anyway.
Don't do afraid of macros. When it makes sense: use them. The advanced part is the when. My example is one example of the whens. They are ridiculously powerful and not that scary.
Rambling
I sometimes use a proper closure/lambda in other languages to execute a set of expressions over and over within a function. It's a little context aware private helper function. Regardless of its proper definition, that's something a closure can do. It helps me write less code. Another benefit of this is you don't need to reference a struct to know how to use it or understand what it's doing. The other answers do not have this benefit, and, if it wasn't obvious I hold readability very highly. I strive for simple legible solutions. This one time I wrote an iOS app and it was wonderful and as simple as I could get it. Then I wrote the same "app" in bash in like 5 lines of code and cursed.
Also embedded systems.
Is it possible to recast the a variable permanently, or have a wrapper function such that the variable would behave like another type?
I would want to achieve something I posted in the other question:
Typecasting variable with another typedef
Update: Added GCC as compiler. May have a extension that would help?
Yes, you can cast a variable from one type to another:
int x = 5;
double y = (double) x; // <== this is what a cast looks like
However, you cannot modify the type of the identifier 'x' in-place, if that is what you are asking. Close to that, though, you can introduce another scope with that identifier redeclared with some new type:
int x = 5;
double y = (double) x;
{
double x = y; // NOTE: this isn't the same as the 'x' identifier above
// ...
}
// NOTE: the symbol 'x' reverts to its previous meaning here.
Another thing you could do, though it is really a horrible, horrible idea is:
int x = 5;
double new_version_of_x = (double) x; // Let's make 'x' mean this
#define x new_version_of_x
// The line above is pure evil, don't actually do it, but yes,
// all lines after this one will think 'x' has type double instead
// of int, because the text 'x' has been rewritten to refer to
// 'new_version_of_x'. This will likely lead to all sorts of havoc
You accomplish that by casting then assigning.
int f(void * p) {
int * i;
i = (int *)p;
//lots of code here with the i pointer, and every line
//really thinks that it is an int pointer and will treat it as such
}
EDIT From the other question you linked:
typedef struct {
unsigned char a;
unsigned char b;
unsigned char c;
} type_a;
typedef struct {
unsigned char e;
unsigned char f[2];
} type_b;
//initialize type a
type_a sample;
sample.a = 1;
sample.b = 2;
sample.c = 3;
Now sample is initialized, but you want to access it differently, you want to pretend that in fact that variable has another type, so you declare a pointer to the type you want to "disguise" sample as:
type_b * not_really_b;
not_really_b = (type_b*)&sample;
See, that is the whole magic.
not_really_b->e is equal 1
not_really_b->f[0] is equal 2
not_really_b->f[1] is equal 3
Does this answer your question?
The other answers are better (declare a variable of the type you want, and do an assignment). If that's not what you're asking for, you could use a macro:
long i;
#define i_as_int ((int)i)
printf( "i = %ld\n", i);
printf( "i = %d\n", i_as_int);
But wouldn't it be clearer to just say (int) i if that's what you mean?
As long as you realize in C pointers are nothing but addresses of memory
locations of certain types, you should have your answer. For example the
following program will print the name of the file
int main(int argc, char *argv[]) {
int *i;
i = (int *) argv[0];
printf("%s\n", argv[0]);
printf("%s\n", ((char *) i));
}