A good C equivalent of STL vector? - c

I've noticed that at several places in our code base we use dynamically expanding arrays, i.e. a base array coupled with an element counter and a "max elements" value.
What I want to do is replace these with a common data structure and utility functions, for the usual object-oriented reasons.
The array elements can be either basic data types or structs, I need fast random access to the elements, and preferably a type-safe implementation.
So, basically, what I would like to use is an STL vector, but the code base is restricted to C89 so I have to come up with something else :-)
I gave it some thought and whipped up this initial draft, just to show what I'm aiming at:
/* Type-safe dynamic list in C89 */
#define list_declare(type) typedef struct _##type##_list_t { type * base_array; size_t elements; size_t max_size; } type##_list_t
#define list(type) type##_list_t
#define list_new(type, initial_size) { calloc(initial_size, sizeof(type)), 0, initial_size }
#define list_free(list) free(list.base_array)
#define list_set(list, place, element) if ( list.elements < list.max_size ) { list.base_array[place] = element; } else { /* Array index out of bounds */ }
#define list_add(list, element) if ( list.elements < list.max_size ) { list.base_array[list.elements++] = element; } else { /* Expand array then add */ }
#define list_get(list, n) list.base_array[n]
/* Sample usage: */
list_declare(int);
int main(void)
{
list(int) integers = list_new(int, 10);
printf("list[0] = %d\n", list_get(integers, 0));
list_add(integers, 4);
printf("list[0] = %d\n", list_get(integers, 0));
list_set(integers, 0, 3);
printf("list[0] = %d\n", list_get(integers, 0));
list_free(integers);
return EXIT_SUCCESS;
}
...however, there must be someone else who has done this before. I'm aware of the FreeBSD sys/queue.h implementation of a similar concept for some different queues, but I can't find anything like that for arrays.
Is anyone here any wiser?

glib provides an GArray type, which implements a dynamically growing array. If you can use external 3rd party libraries, glib is almost always a good choice as "standard" library for C. It provides types for all basic data structures, for unicode strings, for date and time values, and so on.

here a simple vector-replacement, its ONE function for all, its strictly C89 and threadsafe;
libs are too difficult for me, i use my own;
no performance, but easy to use
/* owner-structs too */
typedef struct {
char name[20],city[20];
int salary;
} My,*Myp;
typedef char Str80[80];
/* add here your type with its size */
typedef enum {SPTR,INT=sizeof(int),DOUBLE=sizeof(double),S80=sizeof(Str80),MY=sizeof(My)} TSizes;
typedef enum {ADD,LOOP,COUNT,FREE,GETAT,GET,REMOVEAT,REMOVE} Ops;
void *dynarray(char ***root,TSizes ts,Ops op,void *in,void *out)
{
size_t d=0,s=in?ts?ts:strlen((char*)in)+1:0;
char **r=*root;
while( r && *r++ ) ++d;
switch(op) {
case ADD: if( !*root ) *root=calloc(1,sizeof r);
*root=realloc(*root,(d+2)*sizeof r);
memmove((*root)+1,*root,(d+1)*sizeof r);
memcpy(**root=malloc(s),in,s);
break;
case LOOP: while( d-- ) ((void (*)(char*))in)((*root)[d]); break;
case COUNT: return *(int*)out=d,out;
case FREE: if(r) {
++d; while( d-- ) realloc((*root)[d],0);
free(*root);*root=0;
} break;
case GETAT: { size_t i=*(size_t*)in;
if(r && i<=--d)
return (*root)[d-i];
} break;
case GET: { int i=-1;
while( ++i,d-- )
if( !(ts?memcmp:strncmp)(in,(*root)[d],s) )
return *(int*)out=i,out;
return *(int*)out=-1,out;
}
case REMOVEAT: { size_t i=*(size_t*)in;
if(r && i<=--d) {
free((*root)[d-i]);
memmove(&(*root)[d-i],&(*root)[d-i+1],(d-i+1)*sizeof r);
return in;
}
} break;
case REMOVE: while( *(int*)dynarray(root,ts,GET,in,&d)>=0 )
dynarray(root,ts,REMOVEAT,&d,0);
}
return 0;
}
void outmy(Myp s)
{
printf("\n%s,%s,%d",s->name,s->city,s->salary);
}
main()
{
My z[]={{"Buffet","Omaha",INT_MAX},{"Jobs","Palo Alto",1},{"Madoff","NYC",INT_MIN}};
Str80 y[]={ "123","456","7890" };
char **ptr=0;
int x=1;
/* precondition for first use: ptr==NULL */
dynarray(&ptr,SPTR,ADD,"test1.txt",0);
dynarray(&ptr,SPTR,ADD,"test2.txt",0);
dynarray(&ptr,SPTR,ADD,"t3.txt",0);
dynarray(&ptr,SPTR,REMOVEAT,&x,0); /* remove at index/key ==1 */
dynarray(&ptr,SPTR,REMOVE,"test1.txt",0);
dynarray(&ptr,SPTR,GET,"t3.txt",&x);
dynarray(&ptr,SPTR,LOOP,puts,0);
/* another option for enumerating */
dynarray(&ptr,SPTR,COUNT,0,&x);
while( x-- )
puts(ptr[x]);
dynarray(&ptr,SPTR,FREE,0,0); /* frees all mallocs and set ptr to NULL */
/* start for another (user)type */
dynarray(&ptr,S80,ADD,y[0],0);
dynarray(&ptr,S80,ADD,y[1],0);
dynarray(&ptr,S80,ADD,y[2],0);
dynarray(&ptr,S80,ADD,y[0],0);
dynarray(&ptr,S80,LOOP,puts,0);
dynarray(&ptr,S80,FREE,0,0); /* frees all mallocs and set ptr to NULL */
/* start for another (user)struct-type */
dynarray(&ptr,MY,ADD,&z[0],0);
dynarray(&ptr,MY,ADD,&z[1],0);
dynarray(&ptr,MY,ADD,&z[2],0);
dynarray(&ptr,MY,ADD,&z[0],0);
dynarray(&ptr,MY,LOOP,outmy,0);
dynarray(&ptr,MY,FREE,0,0);
return 0;
}

There is sglib, which implements various lists,hashmaps and rbtrees in a generic fashion (i.e. by specializing over a type). There is also a fast sorting function for arrays:
http://sglib.sourceforge.net/

qLibc implements a vector in pure C. The data structure allows it to store any type of object like (void *object) and it provides convenient wrappers for string, formatted string and integer types.
Here's a sample code for your idea.
qvector_t *vector = qvector(QVECTOR_OPT_THREADSAFE);
vector->addstr(vector, "Hello");
vector->addstrf(vector, "World %d", 123);
char *finalstring = vector->tostring(vector);
printf("%s", finalstring);
free(finalstring)
vector->free(vector);
for object type:
int a = 1, b = 2;
qvector_t *vector = qvector(QVECTOR_OPT_THREADSAFE);
vector->add(vector, (void *)&a, sizeof(int));
vector->add(vector, (void *)&b, sizeof(int));
int *finalarray = vector->toarray(vector);
printf("a = %d, b = %d", finalarray[0], finalarray[1]);
free(finalarray)
vector->free(vector);
Note) I made this sample code just for your reference, copying from its example code.
it might have typo errors.
You can check out the Full API reference at http://wolkykim.github.io/qlibc/

I'm using the following macro implementation without problems so far. It isn't a complete implementation but grows the array automatically :
#define DECLARE_DYN_ARRAY(T) \
typedef struct \
{ \
T *buf; \
size_t n; \
size_t reserved; \
} T ## Array;
#define DYN_ARRAY(T) T ## Array
#define DYN_ADD(array, value, errorLabel) DYN_ADD_REALLOC(array, value, errorLabel, realloc)
#define DYN_ADD_REALLOC(array, value, errorLabel, realloc) \
{ \
if ((array).n >= (array).reserved) \
{ \
if (!(array).reserved) (array).reserved = 10; \
(array).reserved *= 2; \
void *ptr = realloc((array).buf, sizeof(*(array).buf)*(array).reserved); \
if (!ptr) goto errorLabel; \
(array).buf = ptr; \
} \
(array).buf[(array).n++] = value; \
}
To use you first write: DECLARE_DYN_ARRAY(YourType)
To declare variables you write DYN_ARRAY(YourType) array = {0}.
You add elements with DYN_ADD(array, element, errorLabel).
You access elements with array.buf[i].
You get the number of elements with array.n.
When done you free it with free(array.buf) (or whatever function you used to allocate it.)

I usually roll my own code for purposes such as this, like you did. It's not particularly difficult, but having type safety etc. is not easily achievable without a whole OO framework.
As mentioned before, glib offers what you need - if glib2 is too big for you, you could still go with glib1.2. It's quite old, but doesn't have external dependencies (except for pthread if you need thread support). The code can also be integrated into larger projects, if necessary. It's LGPL licensed.

Personally, I prefer "Gena" library. It closely resembles stl::vector in pure C89.
It is comfortable to use because you can:
Access vector elements just like plain C arrays: vec[k][j];
Have multi-dimentional arrays;
Copy vectors;
Instantiate necessary vector types once in a separate module, instead of doing this every time you needed a vector;
You can choose how to pass values into a vector and how to return them from it: by value or by pointer.
You can check it out here:
https://github.com/cher-nov/Gena

Related

using function names as functions in a C macro

Suppose i have code like this in my program:
if (!strcmp(current, "sin")) {
pushFloat(sin(x), &operands);
} else if (!strcmp(current, "cos")) {
pushFloat(cos(x), &operands);
} else if (!strcmp(current, "tan")) {
pushFloat(tan(x), &operands);
} else if (!strcmp(current, "ctg")) {
pushFloat(1. / tan(x), &operands);
} else if (!strcmp(current, "ln")) {
pushFloat(log(x), &operands);
} else if (!strcmp(current, "sqrt")) {
pushFloat(sqrt(x), &operands);
}
There are function names such as "sin" or "cos" saved in the current char array
Instead of using this long if block, or replacing it with an even longer switch block, i wanted to write a simple macro like this: #define PUSHFUNC(stack, func, value)(pushFloat(func(value), &stack)) and call it like this PUSHFUNC(operands, current, x)
Doing it this way creates an error "current is not a function or function pointer". I initially thought macros are just text replacement, so if i force a string that is equal to an actual function into a macro, it would expand to the function itself, but looks like i was wrong. Is there a way to achieve what i want using a macro, or should i just write a map block?
I initially thought macros are just text replacement,
That's your problem: macros are just text replacement. So if you have:
#define PUSHFUNC(stack, func, value) (pushFloat(func(value), &stack))
And you write:
PUSHFUNC(operands, current, x)
You get:
(pushFloat(current(value), &operands))
And indeed, you have no function named current. Macros are expanded before your code compiles; the preprocessor has no knowledge of the content of your variables.
If you really want to avoid a long chain of if statements, you could implement some sort of table lookup:
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <math.h>
typedef double (*floatop)(double x);
typedef struct {
char *name;
floatop operation;
} entry;
double ctg(double);
entry opertable[] = {
{"sin", sin},
{"cos", cos},
{"tan", tan},
{"ctg", ctg},
{"sqrt", sqrt},
{NULL, NULL},
};
double ctg(double x) {
return 1. / tan(x);
}
floatop findop(char *name) {
int i;
for (i=0; opertable[i].name; i++) {
if (strcmp(opertable[i].name, name) == 0) {
return opertable[i].operation;
}
}
}
int main() {
float x = 4;
printf("sin(%f) = %f\n", x, findop("sin")(x));
printf("sqrt(%f) = %f\n", x, findop("sqrt")(x));
printf("tan(%f) = %f\n", x, findop("tan")(x));
printf("ctg(%f) = %f\n", x, findop("ctg")(x));
}
...but this requires that all of your functions take the same arguments, so for things like ctg you would need to add a helper function. You also need to decide if the increased complexity of the table lookup makes sense: it really depends on how many different operation names you expect to implement.
The output of the above code is:
sin(4.000000) = -0.756802
sqrt(4.000000) = 2.000000
tan(4.000000) = 1.157821
ctg(4.000000) = 0.863691
Is there a way to achieve what i want using a macro, or should i just write a map block?
I would recommend using an enum containing symbols for all the functions you might want to call, and using that in a switch-case block, instead of comparing a bunch of strings. Here's a very brief sample that only uses some of the functions you refer to...
enum which_func { SIN, COS, TAN, };
enum which_func which = SIN;
switch (which) {
case SIN:
pushFloat(sin(x), &operands);
break;
case COS:
pushFloat(cos(x), &operands);
break;
case TAN:
pushFloat(tan(x), &operands);
break;
default:
assert(false); // shouldn't be reachable if enum value is well-defined
}
This version will be easier to maintain in the long run, more efficient to execute and possibly more robust to logic errors (there are some compiler warnings that you can enable which will warn you if you're not handling all enum values, which can help you catch missed cases in your logic).
To add to what other answers said, what you can do is to make a macro that expands to the "basic block" of your if chain, avoiding some repetitions thanks to the stringizing operator:
#define HANDLE_FN_EXPR(fn, expr) \
else if(!strcmp(current, #fn)) \
pushFloat((expr), &operands)
#define HANDLE_FN(fn) \
HANDLE_FN_EXPR(fn, fn(x))
Then you can do
if(0);
HANDLE_FN(sin);
HANDLE_FN(cos);
HANDLE_FN(tan);
HANDLE_FN_EXPR(ctg, 1./tan(x));
HANDLE_FN(ln);
HANDLE_FN(sqrt);
Macros do in fact do text replacement. Given your macro definition, this:
PUSHFUNC(operands, current, x)
expands to this:
(pushFloat(current(x), &operands))
So as you can see, the text that is being replaced is the name of the variable, not the text that it contains.
And even if this did work as you expected, it wouldn't be able to properly handle the 1. / tan(x) case.
This means there isn't really a better way to do what you want.
Why not create some objects for each function type? I know, this is C not C++, but the idea will still work. First, create the function object type:-
typedef struct _Function
{
char *name;
float (*function) (float argument);
} Function;arg
And now create an array of function objects:-
Function functions [] =
{
{ "sin", sin },
{ "cos", cos }
// and so on
};
where the functions are defined:-
float sin(float x)
{
return 0; // put correct code here
}
float cos(float x)
{
return 0; // put correct code here
}
Finally, parse the input:-
for (int i = 0; i < sizeof functions / sizeof functions[0]; ++i)
{
if (strcmp(functions[i].name, current) == 0)
{
pushFloat(functions[i].function(arg)); // add operands!
break;
}
}
I find using enums for stuff like this very hard to maintain! Adding new functions means going through the code to find cases where the enum is used and updating it prone to errors (like missing a place!).
All because it's not C++, doesn't mean you can't use objects! It's just there's no language support for it so you have to do a bit more work (and, yeah, there are features missing!)

How to pass a string as a macro?

There are many functions in the C libraries that require users to input with macros.
I wonder, if I have an array of strings, with contents of macros, like so:
char s[][3] = {"SIGINT", "SIGKILL", "SIGSTOP"};
How can I pass these strings as macros? (Like so:)
signal(s[0], do_something);
with do_something is a function pointer.
(and yes, technically I can pass ints in this case, but... hypothetically, ya know?)
EDIT:
As #RemyLebeau and SGeorgiades point out, the "SIGINT",... are aliases for integer consts, and therefore can be stored in an int array, like so:
int s[3] = {SIGINT, SIGKILL, SIGSTOP};
Although SGeorgiades and Remy Lebeau already gave you the answer, here is something that I've used in the past to allow conversion and pretty printing of signal numbers and names:
#include <stdio.h>
#include <signal.h>
#include <string.h>
struct sigfun {
int signo;
const char *signame;
};
#define SIGFUN(_sig) \
{ \
.signo = _sig, \
.signame = #_sig \
}
struct sigfun siglist[] = {
SIGFUN(SIGINT),
SIGFUN(SIGKILL),
SIGFUN(SIGSTOP),
// ...
{ .signo = 0, .signame = NULL }
};
#define SIGFORALL(_sig) \
_sig = siglist; _sig->signame != NULL; ++_sig
int
signame_to_signo(const char *signame)
{
struct sigfun *sig;
for (SIGFORALL(sig)) {
if (strcmp(sig->signame,signame) == 0)
break;
}
return sig->signo;
}
const char *
signo_to_signame(int signo)
{
struct sigfun *sig;
for (SIGFORALL(sig)) {
if (signo == sig->signo)
break;
}
return sig->signame;
}
UPDATE:
why not put for into SIGFORALL? –
tstanisl
For a few reasons ...
I've done that before (e.g.):
#define SIGFORALL(_sig) \
for (_sig = siglist; _sig->signame != 0; ++_sig)
SIGFORALL(sig) {
// do stuff
}
This tends to confuse certain IDEs and/or tools that parse the code without running it through the preprocessor.
It's also more difficult for programmers to quickly (without digesting the macro) skip over it.
They don't see a for and have trouble figuring out what SIGFORALL(sig) { does.
Is the macro a wrapper for if, for, or while?
With:
#define SIGFORALL(_sig) \
_sig = siglist; _sig->signame != 0; ++_sig
for (SIGFORALL(sig)) {
// do stuff
}
there is a better chance they can continue around the construct because they can understand (i.e. skip over) the for (...) [syntactically] without having to know what the macro is doing. That is, nobody has to "drill down" into the macro unless they wish to.
Another reason is that without the for in the macro, we can add extra code to the for loop's initialization and iteration expressions. It's more flexible.
For example, I've used a similar macro for linked list traversal and wanted to know the index/count of an element:
#define LLFORALL(_node) \
_node = nodelist; _node != NULL; _node = _node->next
int idx;
for (idx = 0, LLFORALL(node), ++idx) {
if (node->value == 5)
printf("found value at index %d\n",idx);
}
There's no absolute rule about this. Ultimately, it's a [personal] style preference.
Perhaps what you want instead is:
int s[3] = { SIGINT, SIGKILL, SIGSTOP };
signal(s[0], do_something);

How to implement a 'Pop' function that returns the "popped" element (i.e the data/value) ? (linked list stacks)

Confused as to how to implement a single function that would at the same time pop the element and return it as return value.
So far all I've seen are pop functions that return a pointer to the new head of the stack.
Here's a start, but...
#define VALUE int
typedef struct node_t {
VALUE item;
struct node_t *next;
} node;
.
.
.
// Function
VALUE pop(node *stack_head) {
// Used to store the node we will delete
node *deleteNode = stack_head;
// Error Checking // <<====== (btw, is this actually necessary ?)
if (!deleteNode || !stack_head) {
if (!stack_head) fprintf(stderr, "\nPop failed. --> ...\n");
if (!deleteNode) fprintf(stderr, "\nPop Failed. --> ...\n");
return 0;
}
// Storing the value in a variable
VALUE popped_item = stack_head->item;
// Updating the head
stack_head = stack_head->next; <<====== THERE'S A PROBLEM HERE ! (i think)
// Freeing/Deleting the 'popped' node
free(deleteNode);
// Return 'popped' value
return popped_item;
}
.
.
.
stack_head = stack_head->next;
Doesn't actually change the address that the pointer stack_head (i.e the head of the stack) points to... and so the value is indeed returned for the first pop but subsequent pops return errors.
Yes because it is a local variable but then how would you change the actual pointer (the one that points to the head of the stack) to point to the new head of the stack?
The parameter stack_head is local to the function pop, so when you modify it the result is not visible outside of the function.
You need to pass the address of the variable you want to modify, then in the function you dereference the pointer parameter to change what it points to.
So change your function to this:
VALUE pop(node **stack_head) {
node *deleteNode = *stack_head;
if (!*stack_head) {
fprintf(stderr, "\nPop failed. --> ...\n");
return 0;
}
VALUE popped_item = (*stack_head)->item;
*stack_head = (*stack_head)->next;
free(deleteNode);
return popped_item;
}
And call it like this:
node *stack_head = NULL;
// do something to push onto the stack
VALUE v = pop(&stack_head);
Okay, this will be a pretty long digest, but hopefully worth it. You can see a testcase of the code I've presented as my conclusion here and obtain a modular version of the code here. My suggestion would be that you use a structure like this:
struct {
size_t top;
T value[];
}
The reason you probably shouldn't use classical linked lists for this (or anything, really) is covered by this video courtesy of Bjarne Stroustrup. The basis of the problem is that the majority of your overhead is in allocation and cache misses which don't occur so much when you keep everything in one allocation.
If I were to write this for convenient use, perhaps:
#define stack_of(T) struct { size_t top; T value[]; }
This should allow you to declare empty stacks fairly sensibly, like:
int main(void) {
stack_of(int) *fubar = NULL;
}
This is familiar enough to templates in other languages to work fairly well, and also not a hideous abuse of the preprocessor. I'm sure I've written a push_back function somewhere which we can adapt to this version of push which I've linked to externally as it's not important for the conclusion of this answer (bear with me here; we'll come back to that momentarily)...
So now we have stack_of(T) and push(list, value) which we can use like:
int main(void) {
stack_of(int) *fubar = NULL;
push(fubar, 42);
push(fubar, -1);
}
The simplest solution for pop might be something like:
#define pop(list) (assert(list && list->top), list->value[--list->top]))
... but this does suffer a drawback we'll discuss later. For now we have as a testcase:
int main(void) {
stack_of(int) *fubar = NULL;
int x;
push(fubar, 42);
push(fubar, -1);
x = pop(fubar); printf("popped: %d\n", x);
x = pop(fubar); printf("popped: %d\n", x);
x = pop(fubar); printf("popped: %d\n", x);
}
... and as you'll see during debug the assert fails during execution telling us we've popped more than we've pushed... probably a good thing to have. Still, this doesn't actually reduce the size of the stack. To do that we actually need something more like push again, except we get rid of these lines:
list->top = y; \
list->value[x] = v; \
So there's an opportunity for refactoring. Thus I bring you operate():
#define operate(list, ...) { \
size_t x = list ? list->top : 0 \
, y = x + 1; \
if ((x & y) == 0) { \
void *temp = realloc(list, sizeof *list \
+ (x + y) * sizeof list->value[0]); \
if (!temp) \
return EXIT_FAILURE; \
list = temp; \
} \
__VA_ARGS__; \
}
Now we can redefine push in terms of operate:
#define push(list, v) operate(list, list->value[x] = v; list->top = y)
... and pop looks kind of like it did before, but with an invocation of operate on the end to cause list to shrink (from quadruple its size, for example when you've popped 3 elements off of a list of 4) to no larger than double its size.
#define pop(list) (assert(list && list->top), list->value[--list->top]); \
operate(list, )
Summing it all up, you can see a testcase of the code I've presented here and obtain a modular version of the code here...

C automatic-expandable array of pointers

QUESTION ANSWERED AT END OF PAGE. FULLY WORKING CODE.
Hello, I would like to do in C what I have asked in the title, however, I don't know how to accomplish it. I have done this in C++ thanks to templates but à la C. Here is the fully functional C++ code: List.h (simple database)
*I wonder now if with void pointers I can emulate the code. The problem is that I've seen a link stating that void * should be avoided because it can cause more trouble than it can solve.
Basically it is a "smart-array" that stores pointers to the variables themselves.
If I know the size of each pointer and the size of each structure pointed to, simple mallocs and reallocs should do right?
typedef struct
{
void **list;
// internal
int last_item_index;
size_t element_size; // size of each pointer
int elements; // number of currently allocated elements
int total_size; // >= #elements so that we don't have to always call malloc
int tweak_request_size; // each time the list grows we add this # of elements
} List;
// a shot at an addCopy function
// it deepcopies the object you pass in
List_addCopy(List *db, void *ptr_to_new_element)
{
... // grow **list
// alloc and copy new element
db->list[db->last_item_index+1] = malloc(element_size); // WORKS?
// HOW TO COPY THE ELEMENT TO HERE IF IT IS A STRUCTURE FOR INSTANCE???
...
}
or
// a shot at an assign function
// (allocate the elements yourself then pass the pointer to the List)
List_assign(List *db, void *ptr_to_new_element)
{
db->List = realloc(db->List, element_size*(elements+tweak_request_size));
db->List[db->last_item_index+1] = ptr_to_new_element;
}
// Usage example
List db; // our database
struct funky *now = (funky*)malloc(sizeof(funky));
funky->soul = JamesBrown;
List_addCopy(db, funky);
if (list[0]->soul == JamesBrown)
puts("We did It! :D");
If I alloc everything outside and just pass the pointers to the List I guess the only problem is the void **.
Is List_add possible? Only with callbacks that do the alloc of the element and / or copy it?
Is List_assign possible? I don't want to have a lot of work and end up with unreliable software.
Thanks a lot and sorry for the convolution in the writing :p
You can avoid void* with something like this:
#include <stdio.h>
#include <stdlib.h>
#define List(T) \
typedef struct { \
T** items; \
int count; \
} List_ ## T ;\
\
List_ ## T * List_ ## T ## _New() { \
List_ ## T * list = (List_ ## T *) malloc(sizeof(List_ ## T)); \
list->count = 0; \
return list; \
} \
\
void List_ ## T ## _Add(List_ ## T *list, T * data) { \
printf("%d\n", ++list->count); \
} \
void List_ ## T ## _Del(List_ ## T *list, int index) { \
printf("%d\n", --list->count); \
}
/* define just one list per type */
List(int);
List(double);
int main()
{
int a, b, c;
double d, e;
List_int *l1;
List_double *l2;
l1 = List_int_New();
List_int_Add(l1, &a);
List_int_Add(l1, &b);
List_int_Add(l1, &c);
List_int_Del(l1, 0);
List_int_Del(l1, 0);
List_int_Del(l1, 0);
free(l1);
l2 = List_double_New();
List_double_Add(l2, &d);
List_double_Add(l2, &e);
List_double_Del(l2, 0);
List_double_Del(l2, 0);
free(l2);
return 0;
}
That's a poor man's template =)
I've used Trinidad's method since I wasn't sure void ** would work and it's pretty nice xD
It works perfectly but it is complicated to avoid circular dependencies (including a header in another that results in "multiple reference") without encumbering too much the interface, so I gave up that approach although I've uploaded it too #SourceForge, then I made everything again, this time with void pointers and it works perfectly ;) No worrying about including a header twice, etc. Just works.
Btw, here's the link, use it at your liking: List - the smart && generic container
In any doubt use the help forums, when I have time I'll document it, but for now I'm using it for my projects.

Pretty-printing a binary tree in C (and other imperative languages)

(First-time poster and rather new in programming, so be patient, please!)
I'm interested in both an efficient general algorithm for printing formatted binary trees (in a CLI environment) and a C implementation. Here is some code that I wrote myself for fun (this is a much simplified version of the original and part of a larger program supporting many BST operations, but it should compile just fine):
#include <stdbool.h> // C99, boolean type support
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define DATATYPE_IS_DOUBLE
#define NDEBUG // disable assertions
#include <assert.h>
#define WCHARBUF_LINES 20 // def: 20
#define WCHARBUF_COLMS 800 // def: 80 (using a huge number, like 500, is a good idea,
// in order to prevent a buffer overflow :)
#define RECOMMENDED_CONS_WIDTH 150
#define RECOMMENDED_CONS_WIDTHQ "150" // use the same value, quoted
/* Preprocessor directives depending on DATATYPE_IS_* : */
#if defined DATATYPE_IS_INT || defined DATATYPE_IS_LONG
#define DTYPE long int
#define DTYPE_STRING "INTEGER"
#define DTYPE_PRINTF "%*.*ld"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_FLOAT
#define DTYPE float
#define DTYPE_STRING "FLOAT"
#define DTYPE_PRINTF "%*.*f"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_DOUBLE
#define DTYPE double
#define DTYPE_STRING "DOUBLE"
#define DTYPE_PRINTF "%*.*lf"
#undef DATATYPE_IS_CHAR
#elif defined DATATYPE_IS_CHAR
#define DTYPE char
#define DTYPE_STRING "CHARACTER"
#define DTYPE_PRINTF "%*.*c" /* using the "precision" sub-specifier ( .* ) with a */
/* character will produce a harmless compiler warning */
#else
#error "DATATYPE_IS_* preprocessor directive undefined!"
#endif
typedef struct node_struct {
DTYPE data;
struct node_struct *left;
struct node_struct *right;
/* int height; // useful for AVL trees */
} node;
typedef struct {
node *root;
bool IsAVL; // useful for AVL trees
long size;
} tree;
static inline
DTYPE get_largest(node *n){
if (n == NULL)
return (DTYPE)0;
for(; n->right != NULL; n=n->right);
return n->data;
}
static
int subtreeheight(node *ST){
if (ST == NULL)
return -1;
int height_left = subtreeheight(ST->left);
int height_right = subtreeheight(ST->right);
return (height_left > height_right) ? (height_left + 1) : (height_right + 1);
}
void prettyprint_tree(tree *T){
if (T == NULL) // if T empty, abort
return;
#ifndef DATATYPE_IS_CHAR /* then DTYPE is a numeric type */
/* compute spaces, find width: */
int width, i, j;
DTYPE max = get_largest(T->root);
width = (max < 10) ? 1 :
(max < 100) ? 2 :
(max < 1000) ? 3 :
(max < 10000) ? 4 :
(max < 100000) ? 5 :
(max < 1000000) ? 6 :
(max < 10000000) ? 7 :
(max < 100000000) ? 8 :
(max < 1000000000) ? 9 : 10;
assert (max < 10000000000);
width += 2; // needed for prettier results
#if defined DATATYPE_IS_FLOAT || defined DATATYPE_IS_DOUBLE
width += 2; // because of the decimals! (1 decimal is printed by default...)
#endif // float or double
int spacesafter = width / 2;
int spacesbefore = spacesafter + 1;
//int spacesbefore = ceil(width / 2.0);
#else /* character input */
int i, j, width = 3, spacesbefore = 2, spacesafter = 1;
#endif // #ifndef DATATYPE_IS_CHAR
/* start wchar_t printing, using a 2D character array with swprintf() : */
struct columninfo{ // auxiliary structure
bool visited;
int col;
};
wchar_t wcharbuf[WCHARBUF_LINES][WCHARBUF_COLMS];
int line=0;
struct columninfo eachline[WCHARBUF_LINES];
for (i=0; i<WCHARBUF_LINES; ++i){ // initialization
for (j=0; j<WCHARBUF_COLMS; ++j)
wcharbuf[i][j] = (wchar_t)' ';
eachline[i].visited = false;
eachline[i].col = 0;
}
int height = subtreeheight(T->root);
void recur_swprintf(node *ST, int cur_line, const wchar_t *nullstr){ // nested function,
// GCC extension!
float offset = width * pow(2, height - cur_line);
++cur_line;
if (eachline[cur_line].visited == false) {
eachline[cur_line].col = (int) (offset / 2);
eachline[cur_line].visited = true;
}
else{
eachline[cur_line].col += (int) offset;
if (eachline[cur_line].col + width > WCHARBUF_COLMS)
swprintf(wcharbuf[cur_line], L" BUFFER OVERFLOW DETECTED! ");
}
if (ST == NULL){
swprintf(wcharbuf[cur_line] + eachline[cur_line].col, L"%*.*s", 0, width, nullstr);
if (cur_line <= height){
/* use spaces instead of the nullstr for all the "children" of a NULL node */
recur_swprintf(NULL, cur_line, L" ");
recur_swprintf(NULL, cur_line, L" ");
}
else
return;
}
else{
recur_swprintf(ST->left, cur_line, nullstr);
recur_swprintf(ST->right, cur_line, nullstr);
swprintf(wcharbuf[cur_line] + eachline[cur_line].col - 1, L"("DTYPE_PRINTF"",
spacesbefore, 1, ST->data);
//swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 1, L")");
swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 2, L")");
}
}
void call_recur(tree *tr){ // nested function, GCC extension! (wraps recur_swprintf())
recur_swprintf(tr->root, -1, L"NULL");
}
call_recur(T);
/* Omit empty columns: */
int omit_cols(void){ // nested function, GCC extension!
int col;
for (col=0; col<RECOMMENDED_CONS_WIDTH; ++col)
for (line=0; line <= height+1; ++line)
if (wcharbuf[line][col] != ' ' && wcharbuf[line][col] != '\0')
return col;
return 0;
}
/* Use fputwc to transfer the character array to the screen: */
j = omit_cols() - 2;
j = (j < 0) ? 0 : j;
for (line=0; line <= height+1; ++line){ // assumes RECOMMENDED_CONS_WIDTH console window!
fputwc('\n', stdout); // optional blanc line
for (i=j; i<j+RECOMMENDED_CONS_WIDTH && i<WCHARBUF_COLMS; ++i)
fputwc(wcharbuf[line][i], stdout);
fputwc('\n', stdout);
}
}
(also uploaded to a pastebin service, in order to preserve syntax highlighting)
It works quite well, although the automatic width setting could be better. The preprocessor magic is a bit silly (or even ugly) and not really related to the algorithm, but it allows using various data types in the tree nodes (I saw it as a chance to experiment a bit with the preprocessor - remember, I am a newbie!).
The main program is supposed to call
system("mode con:cols="RECOMMENDED_CONS_WIDTHQ" lines=2000");
before calling prettyprint_tree(), when running inside cmd.exe .
Sample output:
(106.0)
(102.0) (109.0)
(101.5) NULL (107.0) (115.0)
NULL NULL (106.1) NULL (113.0) NULL
NULL NULL NULL NULL
Ideally, the output would be like this (the reason I'm using the wprintf() family of functions is being able to print Unicode characters anyway):
(107.0)
┌─────┴─────┐
(106.1) NULL
┌───┴───┐
NULL NULL
So, my questions:
What do you think about this code? (Coding style suggestions are also very welcome!)
Can it be extended in an elegant way in order to include the line-drawing characters? (Unfortunately, I don't think so.)
Any other algorithms in C or other imperative languages (or imperative pseudo-code)?
Somewhat unrelated: What's your opinion about nested functions (non-portable GNU extension)? I think it's an elegant way to write recursive parts of a function without having to provide all the local variables as arguments (and also useful as an implementation-hiding technique), but it could be my Pascal past :-) I'm interested in the opinion of more experienced coders.
Thank you in advance for your responses!
PS. The question is not a duplicate of this one.
edit:
Jonathan Leffler wrote an excellent answer that will most probably become the "accepted answer" after a few days (unless someone posts something equally awesome!). I decided to respond here instead of commenting because of the space constraints.
The code above is actually part of a larger "homework" project (implementing BST operations in a shared library + a CLI app using that library). However, the "prettyprint" function was not part of the requirements; just something I decided to add myself.
I also added a "convert to AVL without rotations" function, that used "arraystr" as an intermediate representation ;-) I forgot that it wasn't used here. I've edited the code to remove it. Also, the bool IsAVL struct member is anything but unused; just not used in this particular function. I had to copy/paste code from various files and make a lot of changes in order to present the code cited above. That's a problem that I don't know how to solve. I would gladly post the whole program, but it is too large and commented in my mother-tongue (not in English!).
The whole project was about 1600 LOC (including comments) with multiple build targets (debug/release/static-linking) and it compiled cleanly with -Wall and -Wextra enabled. Assertions and debug messages were enabled/disabled automatically depending on the build target. Also I thought that function prototypes weren't needed for nested functions, after all nested functions do not implement any external interface by definition - GCC certainly didn't complain here. I don't know why there are so many warnings on OSX :(
I'm using GCC 4.4.1 on Windows 7.
Despite writing and testing this program on Windows, I am actually a Linux user... Still, I can't stand vim and I use nano (inside GNU screen) or gedit instead (shoot me)! In any case, I prefer the K&R brace style :)
Portability doesn't really matter, for Linux users GCC is pretty much de facto... The fact that it also works well under Windows is a nice bonus.
I'm not using a VCS, perhaps I should. I want to try, but all of them seem too complex for my needs and I don't know how to choose one :-)
You are definitely right about checking for depth overflow, thankfully it is very easy to add.
Thanks for the L' ' advice!
I find your suggestion (encapsulating "the whole of the drawing code so that the screen image and related information is in a single structure") extremely interesting... but I don't really understand what you mean as "encapsulation". Could you, please, provide 3 or 4 lines of (pseudo)code showing a possible function declaration and/or a possible function call?
This is my first "large-ish" (and non-trivial) program and I'm really thankful for your advice.
edit #2:
Here is an implementation of the "quick and dirty" method mentioned here.
(edit #3: I decided to split it to a separate answer, since it is a valid answer to the OP.)
Many responses mentioned Graphviz. I already knew about it (many Linux apps are linked against it) but I thought it would be overkill for a 10KB CLI executable. However, I'll keep it in mind for the future. It seems great.
You need to decide on whether your code needs to be portable. If you might ever need to use a compiler other than GCC, the nested functions are lethal to your portability goal. I would not use them - but my portability goals may not be the same as yours.
Your code is missing <wchar.h>; it compiles fairly cleanly without it - GCC complained about missing prototypes for your non-static functions and for swprintf() and fputwc()), but adding <wchar.h> generates a lot of serious warnings related to swprintf(); they are actually diagnosing a bug.
gcc -O -I/Users/jleffler/inc -std=c99 -Wall -Wextra -Wmissing-prototypes \
-Wstrict-prototypes -Wold-style-definition -c tree.c
tree.c:88:6: warning: no previous prototype for ‘prettyprint_tree’
tree.c: In function ‘prettyprint_tree’:
tree.c:143:10: warning: no previous prototype for ‘recur_swprintf’
tree.c: In function ‘recur_swprintf’:
tree.c:156:17: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
/usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
tree.c:156:17: error: too few arguments to function ‘swprintf’
/usr/include/wchar.h:135:5: note: declared here
tree.c:160:13: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
/usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
tree.c:174:22: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
/usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
tree.c:174:22: warning: passing argument 3 of ‘swprintf’ makes pointer from integer without a cast
/usr/include/wchar.h:135:5: note: expected ‘const wchar_t * restrict’ but argument is of type ‘int’
tree.c:177:13: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
/usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
tree.c:177:13: error: too few arguments to function ‘swprintf’
/usr/include/wchar.h:135:5: note: declared here
tree.c: In function ‘prettyprint_tree’:
tree.c:181:10: warning: no previous prototype for ‘call_recur’
tree.c:188:9: warning: no previous prototype for ‘omit_cols’
(This is GCC 4.5.2 on MacOS X 10.6.5.)
Do look up the interface to swprintf(); it is more like snprintf() than sprintf() (which is A Good Thing™!).
The overall idea is interesting. I suggest choosing one representation when submitting your code for analysis, and cleaning up anything that is not relevant to the code analysis. For example, the arraystr type is defined but unused - you don't want to let people like me get cheap shots at your code. Similarly with the unused structure members; don't even leave them as comments, even if you might want to keep them in the code in your VCS (though why?). You are using a version control system (VCS), aren't you? And that's a rhetorical question - if you aren't using a VCS, start using one now, before you lose something you value.
Design-wise, you want to avoid doing things like requiring the main program to run an obscure system() command - your code should take care of such issues (maybe with an initializer function, and perhaps a finalizer function to undo the changes made to the terminal settings).
One more reason not to like nested functions: I can't work out how to get a declaration of the function in place. What seemed like plausible alternatives did not work - but I didn't go and read the GCC manual on them.
You check for column-width overflow; you do not check for depth overflow. Your code will crash and burn if you create a tree that is too deep.
Minor nit: you can tell people who do not use 'vi' or 'vim' to edit - they don't put the opening brace of a function in column 1. In 'vi', the opening brace in column 1 gives you an easy way to the start of a function from anywhere inside it ('[[' to jump backwards; ']]' to jump to the start of the next function).
Don't disable assertions.
Do include a main program and the relevant test data - it means people can test your code, instead of just compiling it.
Use wide-character constants instead of casts:
wcharbuf[i][j] = (wchar_t)' ';
wcharbuf[i][j] = L' ';
Your code creates a big screen image (20 lines x 800 columns in the code) and fills in the data to be printed. That's a reasonable way to do it. With care, you could arrange to handle the line-drawing characters. However, I think you would need to rethink the core drawing algorithms. You would probably want to encapsulate the whole of the drawing code so that the screen image and related information is in a single structure, which can be passed by reference (pointer) to functions. You'd have a set of functions to draw various bits at positions your tree-searching code designates. You would have a function to draw the data value at an appropriate position; you would have a function to draw lines at appropriate positions. You would probably not have nested functions - it is, to my eyes, far harder to read the code when there's a function nested inside another. Making functions static is good; make the nested functions into static (non-nested) functions. Give them the context they need - hence the encapsulation of the screen image.
Overall a good start; lots of good ideas. Lots still to do.
Request for information on encapsulation...
You could use a structure such as:
typedef struct columninfo Colinfo;
typedef struct Image
{
wchar_t image[WCHARBUF_LINES][WCHARBUF_COLUMNS];
Colinfo eachline[WCHARBUF_LINES];
} Image;
Image image;
You might find it convenient and/or sensible to add some extra members; that would show up during the implementation. You might then create a function:
void format_node(Image *image, int line, int column, DTYPE value)
{
...
}
You could also make some of the constants, such as spacesafter into enum values:
enum { spacesafter = 2 };
These can then be used by any of the functions.
Coding style: The prettyprint_tree() function juggles too much computation and data to be comfortable to read. Initialization and printing of the image buffer can for example be placed in separate functions and the width computation also. I am sure you can write a formula with log to replace the
width = (max < 10) ? 1 :
(max < 100) ? 2 :
(max < 1000) ? 3 :
...
computation.
I am not used to reading nested functions and C, which makes it much harder for me to scan your code. Unless you don't share your code with others or have ideological reasons for tying the code to GCC, I wouldn't use those extensions.
Algorithm: For a quick and dirty pretty-printer, written in C, I would never use your style of layout. In comparison to your algorithm, it is a no-brainer to write an in-order traversal to print
a
/ \
b c
as
c
a
b
and I don't mind having to tilt my head. For anything prettier than that I would much rather emit
digraph g { a -> b; a -> c; }
and leave it to dot to do the formatting.
This code should work its from:http://www.ihas1337code.com/2010/09/how-to-pretty-print-binary-tree.html
#include <fstream>
#include <iostream>
#include <deque>
#include <iomanip>
#include <sstream>
#include <string>
#include <cmath>
using namespace std;
struct BinaryTree {
BinaryTree *left, *right;
int data;
BinaryTree(int val) : left(NULL), right(NULL), data(val) { }
};
// Find the maximum height of the binary tree
int maxHeight(BinaryTree *p) {
if (!p) return 0;
int leftHeight = maxHeight(p->left);
int rightHeight = maxHeight(p->right);
return (leftHeight > rightHeight) ? leftHeight + 1: rightHeight + 1;
}
// Convert an integer value to string
string intToString(int val) {
ostringstream ss;
ss << val;
return ss.str();
}
// Print the arm branches (eg, / \ ) on a line
void printBranches(int branchLen, int nodeSpaceLen, int startLen, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) {
deque<BinaryTree*>::const_iterator iter = nodesQueue.begin();
for (int i = 0; i < nodesInThisLevel / 2; i++) {
out << ((i == 0) ? setw(startLen-1) : setw(nodeSpaceLen-2)) << "" << ((*iter++) ? "/" : " ");
out << setw(2*branchLen+2) << "" << ((*iter++) ? "\\" : " ");
}
out << endl;
}
// Print the branches and node (eg, ___10___ )
void printNodes(int branchLen, int nodeSpaceLen, int startLen, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) {
deque<BinaryTree*>::const_iterator iter = nodesQueue.begin();
for (int i = 0; i < nodesInThisLevel; i++, iter++) {
out << ((i == 0) ? setw(startLen) : setw(nodeSpaceLen)) << "" << ((*iter && (*iter)->left) ? setfill('_') : setfill(' '));
out << setw(branchLen+2) << ((*iter) ? intToString((*iter)->data) : "");
out << ((*iter && (*iter)->right) ? setfill('_') : setfill(' ')) << setw(branchLen) << "" << setfill(' ');
}
out << endl;
}
// Print the leaves only (just for the bottom row)
void printLeaves(int indentSpace, int level, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) {
deque<BinaryTree*>::const_iterator iter = nodesQueue.begin();
for (int i = 0; i < nodesInThisLevel; i++, iter++) {
out << ((i == 0) ? setw(indentSpace+2) : setw(2*level+2)) << ((*iter) ? intToString((*iter)->data) : "");
}
out << endl;
}
// Pretty formatting of a binary tree to the output stream
// # param
// level Control how wide you want the tree to sparse (eg, level 1 has the minimum space between nodes, while level 2 has a larger space between nodes)
// indentSpace Change this to add some indent space to the left (eg, indentSpace of 0 means the lowest level of the left node will stick to the left margin)
void printPretty(BinaryTree *root, int level, int indentSpace, ostream& out) {
int h = maxHeight(root);
int nodesInThisLevel = 1;
int branchLen = 2*((int)pow(2.0,h)-1) - (3-level)*(int)pow(2.0,h-1); // eq of the length of branch for each node of each level
int nodeSpaceLen = 2 + (level+1)*(int)pow(2.0,h); // distance between left neighbor node's right arm and right neighbor node's left arm
int startLen = branchLen + (3-level) + indentSpace; // starting space to the first node to print of each level (for the left most node of each level only)
deque<BinaryTree*> nodesQueue;
nodesQueue.push_back(root);
for (int r = 1; r < h; r++) {
printBranches(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out);
branchLen = branchLen/2 - 1;
nodeSpaceLen = nodeSpaceLen/2 + 1;
startLen = branchLen + (3-level) + indentSpace;
printNodes(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out);
for (int i = 0; i < nodesInThisLevel; i++) {
BinaryTree *currNode = nodesQueue.front();
nodesQueue.pop_front();
if (currNode) {
nodesQueue.push_back(currNode->left);
nodesQueue.push_back(currNode->right);
} else {
nodesQueue.push_back(NULL);
nodesQueue.push_back(NULL);
}
}
nodesInThisLevel *= 2;
}
printBranches(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out);
printLeaves(indentSpace, level, nodesInThisLevel, nodesQueue, out);
}
int main() {
BinaryTree *root = new BinaryTree(30);
root->left = new BinaryTree(20);
root->right = new BinaryTree(40);
root->left->left = new BinaryTree(10);
root->left->right = new BinaryTree(25);
root->right->left = new BinaryTree(35);
root->right->right = new BinaryTree(50);
root->left->left->left = new BinaryTree(5);
root->left->left->right = new BinaryTree(15);
root->left->right->right = new BinaryTree(28);
root->right->right->left = new BinaryTree(41);
cout << "Tree pretty print with level=1 and indentSpace=0\n\n";
// Output to console
printPretty(root, 1, 0, cout);
cout << "\n\nTree pretty print with level=5 and indentSpace=3,\noutput to file \"tree_pretty.txt\".\n\n";
// Create a file and output to that file
ofstream fout("tree_pretty.txt");
// Now print a tree that's more spread out to the file
printPretty(root, 5, 0, fout);
return 0;
}
Maybe you can take a look at the Bresenham's line algorithm that it could be suitable for you
Here is a C implementation of the "quick and dirty" method mentioned here. It doesn't get much quicker and/or dirtier:
void shittyprint_tree(tree *T){ // Supposed to be quick'n'dirty!
// When DTYPE is "char", width is a bit larger than needed.
if (T == NULL)
return;
const int width = ceil(log10(get_largest(T->root)+0.01)) + 2;
const wchar_t* sp64 = L" ";
void nested(node *ST, int spaces){ // GCC extension
if (ST == NULL){
wprintf(L"\n"); // Can be commented to disable the extra blanc line.
return;
}
nested(ST->right, spaces + width);
wprintf(L"%*.*s("DTYPE_PRINTF")\n", 0, spaces, sp64, 1, 1, ST->data);
nested(ST->left, spaces + width);
}
nested(T->root, 2);
}
Sample output (using the same tree as before):
(115.0)
(113.0)
(109.0)
(107.0)
(106.1)
(106.0)
(102.0)
(101.5)
I can't say, though, that it fits my original requirements...

Resources