Related
I need to store a 4 bytes number in a flash memory. I can't erase/write the number every time, because the flash memories have a limited number of write/erase cycles. To avoid breaking the flash too fast, I've reserved a full page just for this counter. On each write, I write the number on the next four bytes. Once the full page is written, I erase the page and I start writing from the first address.
To implement this, I need the following functionalities:
Find the next free address to write(frist address with a value of 0xFFFFFFFF)
Find the last written address(the last address with a value different from 0xFFFFFFFF)
The number will never have a value of 0xFFFFFFFF.
First I tried by just iterating 4+4+4.. and it all worked fine. Unfortunately, it turns out that the reading from the flash is too slow. I need faster algorithm. For my case, the binary search will be perfect.
This is what I managed to do so far:
u32 FindNextFreeAddress(u32* arr, s32 l, s32 r, u32* address)
{
if (!arr || !address) {
return 1;
}
while (l <= r) {
s32 m = l + ((r - l) / 2);
if (arr[m] == 0xFFFFFFFF) {//when is found?
if (m && (arr[m-1] != 0xFFFFFFFF)) {
*address = (u32)&arr[m];
return 0;
}
r = m - 1;
} else {
l = m + 1;
}
}
return 1;
}
I am having difficulties figuring out when the 0xFFFFFFFF is actually found. I see a problem if the whole memory is erased, meaning only 0xFF. The first address will not be found.
Also I haven't done anything for the other one
u32 FindLastOccupiedAddress(u32* arr, s32 l, s32 r, u32* address)
I guess I have to somehow reverse FindNextFreeAddress, but I'm still figuring it out.
This check:
if (m && (arr[m-1] != 0xFFFFFFFF)) {
Will always be false when m is 0, i.e. on the first element of the list. So you want the check to pass if you're on the first element or if the previous element is not the flag value:
if (!m || (arr[m-1] != 0xFFFFFFFF)) {
Perhaps a slightly different approach.
At each iteration, divide n by 2.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
const uint32_t* FindNextFreeAddress(const uint32_t *arr, size_t n) {
const uint32_t *end = NULL;
size_t i = 0;
while (n > 0) {
size_t mid = n / 2;
if (arr[i + mid] == 0xFFFFFFFF) {
n = mid;
end = &arr[i + mid];
} else {
i += mid + 1;
n = n - mid - 1;
}
}
return end;
}
void test(const uint32_t *arr, size_t n) {
const uint32_t *p = FindNextFreeAddress(arr, n);
if (p) {
size_t index = (size_t) (p - arr);
printf("Offset %zu, last written address contents: %lu\n", //
index, (unsigned long) (index > 0 ? arr[index - 1] : 0));
} else {
printf("Empty\n");
}
}
int main() {
uint32_t a[4] = {123, 456, 789, 012};
test(a, 4);
uint32_t b[4] = {123, 456, 789, -1u};
test(b, 4);
uint32_t c[4] = {123, -1u, -1u, -1u};
test(c, 4);
uint32_t d[4] = {-1u, -1u, -1u, -1u};
test(d, 4);
puts("");
uint32_t e[5] = {123, 456, 789, 12, 345};
test(e, 5);
uint32_t f[3] = {123, 456, 789};
test(f, 3);
uint32_t g[5] = {-1u, -1u, -1u, -1u, -1u};
test(g, 5);
uint32_t h[3] = {-1u, -1u, -1u};
test(h, 3);
puts("");
uint32_t i[5] = {123, 456, -1u, -1u, -1u};
test(i, 5);
uint32_t j[3] = {123, 456, -1u};
test(j, 3);
}
Output
Empty
Offset 3, last written address contents: 789
Offset 1, last written address contents: 123
Offset 0, last written address contents: 0
Empty
Empty
Offset 0, last written address contents: 0
Offset 0, last written address contents: 0
Offset 2, last written address contents: 456
Offset 2, last written address contents: 456
When FindNextFreeAddress(arr, n) returns NULL, the last written address is &arr[n-1].
When FindNextFreeAddress(arr, n) returns a, the last written address is NULL.
When FindNextFreeAddress(arr, n) returns more than a, the last written address is a-1.
As it is emebedded target (FLASH memory) and you probably use gcc.
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <string.h>
u32 FindLastOccupiedAddress(u32 start, u32 length)
{
return (u32)memmem((void *)start, length, (u32[]){0xffffffff}, sizeof(0xffffffff)) - 1;
}
start start address if the area in flash
length length of that area
Here is my test code to find 1st clipping area on the screen.
Two subroutines and dummy loops in the code to compare the performance of them.
point_in_neon (NEON version) and point_in (Regular version) does the same thing:
find out the first clipping area (contains given point) in given list and return -1 if there is no matching area.
I expected NEON version is faster than regular version.
Unfortunately, it is slower than regular version. Is there another way to speed it up?
The compiler command is:
${CC} -O2 -ftree-vectorize -o vcomp vcomp.c
Thanks,
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <sys/time.h>
#include <arm_neon.h>
#define WIDTH (4096)
#define HEIGHT (4096)
#define CLIPS (32)
static inline uint64_t now(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return tv.tv_sec*1000000+tv.tv_usec;
}
typedef struct _rect_t {
int32_t x;
int32_t y;
uint32_t width;
uint32_t height;
} rect_t;
typedef struct _point_t {
int32_t x;
int32_t y;
} point_t;
int32_t inline point_in_neon(const point_t *pt, const rect_t rs[4]) {
const int32_t right[4]={
rs[0].x+rs[0].width-1,
rs[1].x+rs[1].width-1,
rs[2].x+rs[2].width-1,
rs[3].x+rs[3].width-1
}, bottom[4]={
rs[0].y+rs[0].height-1,
rs[1].y+rs[1].height-1,
rs[2].y+rs[2].height-1,
rs[3].y+rs[3].height-1
};
int32x4_t p, r;
uint32x4_t t;
uint32_t res[4];
//p = <Xp, Xp, Xp, Xp>
p=vld1q_dup_s32(&pt->x);
//r = <Left0, Left1, Left2, Left3>
r=vld1q_lane_s32(&rs[0].x, r, 0);
r=vld1q_lane_s32(&rs[1].x, r, 1);
r=vld1q_lane_s32(&rs[2].x, r, 2);
r=vld1q_lane_s32(&rs[3].x, r, 3);
//t = (p >= r)
t=vcgeq_s32(p, r);
//r = <Right0, Right1, Right2, Right3>
r=vld1q_s32(&right);
//t = t & (r >= p)
t=vandq_u32(t, vcgeq_s32(r, p));
//p = <Yp, Yp, Yp, Yp>
p=vld1q_dup_s32(&pt->y);
//r = <Top0, Top1, Top2, Top3>
r=vld1q_lane_s32(&rs[0].y, r, 0);
r=vld1q_lane_s32(&rs[1].y, r, 1);
r=vld1q_lane_s32(&rs[2].y, r, 2);
r=vld1q_lane_s32(&rs[3].y, r, 3);
//t = t & (p >= r)
t=vandq_u32(t, vcgeq_s32(p, r));
//r = <Bottom0, Bottom1, Bottom2, Bottom3>
r=vld1q_s32(&bottom);
//t = t & (r >= p)
t=vandq_u32(t, vcgeq_s32(r, p));
vst1q_u32(res, t);
if(res[0])
return 0;
else if(res[1])
return 1;
else if(res[2])
return 2;
else if(res[3])
return 3;
return -1;
}
int32_t inline point_in(const point_t *pt, const rect_t *rs, uint32_t len) {
int32_t i;
for(i=0;i<len;i++) {
int32_t right=rs[i].x+rs[i].width-1,
bottom=rs[i].y+rs[i].height-1;
if(pt->x>=rs[i].x && pt->x<=right &&
pt->y>=rs[i].y && pt->y<=bottom)
return i;
}
return -1;
}
int32_t main(int32_t argc, char *argv[]) {
rect_t rs[CLIPS];
int32_t i, j;
uint64_t ts0, ts1;
int32_t res[2][CLIPS];
srand((unsigned int)time(NULL));
for(i=0;i<CLIPS;i++) {
rs[i].x=rand()%WIDTH;
rs[i].y=rand()%HEIGHT;
rs[i].width=rand()%WIDTH;
rs[i].height=rand()%HEIGHT;
}
memset(res, 0, sizeof(res));
ts0=now();
for(i=0;i<HEIGHT;i++) {
for(j=0;j<WIDTH;j++) {
point_t p={i, j};
int32_t idx=point_in(&p, rs, CLIPS);
if(idx>=0)
res[0][idx]=1;
}
}
ts0=now()-ts0;
ts1=now();
for(i=0;i<HEIGHT;i++) {
for(j=0;j<WIDTH;j++) {
int32_t k, idx;
point_t p={i, j};
for(k=0, idx=-1;k<CLIPS/4;k++) {
idx=point_in_neon(&p, &rs[k*4]);
if(idx>=0)
break;
}
if(idx>=0)
res[1][k*4+idx]=1;
}
}
ts1=now()-ts1;
/*
for(i=0;i<CLIPS;i++) {
if(res[0][i]!=res[1][i]) {
printf("error.\n");
return 1;
}
}
*/
printf("regular = %lu\n", ts0);
printf("neon = %lu\n", ts1);
return 0;
}
According to Peter Cordes's suggestion, I replaced data loding parts of point_in_neon subroutine with vld4q_s32 intrinsic and subsequent right and bottom calculation can be vectorized. Now the code is shorter and faster than regular version.
int32_t inline point_in_neon(const point_t *pt, const rect_t rs[4]) {
int32x4x4_t r;
int32x4_t right, bottom, p;
uint32x4_t t;
uint32_t res[4];
/*
r.val[0] = <X0, X1, X2, X3>
r.val[1] = <Y0, Y1, Y2, Y3>
r.val[2] = <Width0, Width1, Width2, Width3>
r.val[3] = <Height0, Height1, Height2, Height3>
*/
r=vld4q_s32(rs);
//right = <Right0, Right1, Right2, Right3>
right=vsubq_s32(vaddq_s32(r.val[0], r.val[2]), vdupq_n_s32(1));
//bottom = <Bottom0, Bottom1, Bottom2, Bottom3>
bottom=vsubq_s32(vaddq_s32(r.val[1], r.val[3]), vdupq_n_s32(1));
//p = <Xp, Xp, Xp, Xp>
p=vld1q_dup_s32(&pt->x);
//t = (p >= left)
t=vcgeq_s32(p, r.val[0]);
//t = t & (right >= p)
t=vandq_u32(t, vcgeq_s32(right, p));
//p = <Yp, Yp, Yp, Yp>
p=vld1q_dup_s32(&pt->y);
//t = t & (p >= top)
t=vandq_u32(t, vcgeq_s32(p, r.val[1]));
//t = t & (r >= bottom)
t=vandq_u32(t, vcgeq_s32(bottom, p));
vst1q_u32(res, t);
if(res[0])
return 0;
else if(res[1])
return 1;
else if(res[2])
return 2;
else if(res[3])
return 3;
return -1;
}
Starting with your original point_in method, we can clean up a little bit here by removing the -1's, and changing <= to <.
int32_t inline point_in(const point_t *pt, const rect_t *rs, uint32_t len) {
int32_t i;
for(i=0; i < len; i++)
{
// this is pointless - change your data structures so that
// the rect stores minx/maxx, miny/maxy instead!
int32_t right = rs[i].x + rs[i].width;
int32_t bottom= rs[i].y + rs[i].height;
bool cmp0 = pt->x >= rs[i].x;
bool cmp1 = pt->y >= rs[i].y;
bool cmp2 = pt->x < right;
bool cmp3 = pt->y < bottom;
if(cmp0 & cmp1 & cmp2 & cmp3)
return i;
}
return -1;
}
Next obvious thing to point out:
// your screen size...
#define WIDTH (4096)
#define HEIGHT (4096)
// yet your structures use uint32 as storage???
typedef struct _rect_t {
int32_t x;
int32_t y;
uint32_t width;
uint32_t height;
} rect_t;
typedef struct _point_t {
int32_t x;
int32_t y;
} point_t;
If you can get away with using 16bit integers, this will go at twice the speed (because you can fit 8x 16bit numbers in a SIMD register, v.s. 4x 32bit). Whilst we're at it, we might as well change the data layout to structure of array at the same time.
I'm also going to hoist the pointless p.x + width out, and store it as xmax/ymax instead (removes duplicated computation in your loops).
typedef struct rect_x8_t {
int16x8_t x;
int16x8_t y;
int16x8_t xmax; //< x + width
int16x8_t ymax; //< y + height
} rect_x8_t;
typedef struct point_x8_t {
int16x8_t x;
int16x8_t y;
} point_x8_t;
On the assumption you don't have a number of clips that's divisible by 8, we'll need to pad the number slightly (not a big deal)
// assuming this has already been initialised
rect_t rs[CLIPS];
// how many batches of 8 do we need?
uint32_t CLIPS8 = (CLIPS / 8) + (CLIPS & 7 ? 1 : 0);
// allocate in batches of 8
rect_x8_t rs8[CLIPS8] = {};
// I'm going to do this rubbishly as an pre-process step.
// I don't care too much about efficiency here...
for(uint32_t i = 0; i < CLIPS; ++i) {
rs8[i / 8].x[i & 7] = rs[i].x;
rs8[i / 8].y[i & 7] = rs[I].y;
rs8[i / 8].xmax[i & 7] = rs[i].x + rs[i].width;
rs8[i / 8].ymax[i & 7] = rs[i].y + rs[i].height;
}
I have a couple of concerns here:
for(i=0;i<HEIGHT;i++) {
for(j=0;j<WIDTH;j++) {
// This seems wrong? Shouldn't it be p = {j, i} ?
point_t p={i, j};
int32_t idx=point_in(&p, rs, CLIPS);
// I'm not quite sure what the result says about your
// image data and clip regions???
//
// This seems like a really silly way of asking
// a simple question about the clip regions. The pixels
// don't have any effect here.
if(idx >= 0)
res[0][idx] = 1;
}
}
Anyhow, now refactoring the point_in method to use int16x8_t, we get:
inline int32_t point_in_x8(const point_x8_t pt,
const rect_x8_t* rs,
uint32_t len) {
for(int32_t i = 0; i < len; i++) {
// perform comparisons on 8 rects at a time
uint16x8_t cmp0 = vcgeq_s16(pt.x, rs[i].x);
uint16x8_t cmp1 = vcgeq_s16(pt.y, rs[i].y);
uint16x8_t cmp2 = vcltq_s16(pt.x, rs[i].xmax);
uint16x8_t cmp3 = vcltq_s16(pt.y, rs[I].ymax);
// combine to single comparison value
uint16x8_t cmp01 = vandq_u16(cmp0, cmp1);
uint16x8_t cmp23 = vandq_u16(cmp2, cmp3);
uint16x8_t cmp0123 = vandq_u16(cmp01, cmp23);
// use a horizontal max to see if any lanes are true
if(vmaxvq_u16(cmp0123)) {
for(int32_t j = 0; j < 8; ++j) {
if(cmp0123[j])
return 8*i + j;
}
}
}
return -1;
}
Any additional padded elements in the rect_x8_t structs should end up being ignored (since they should be 0/0, 0/0, which will always end up being false).
Then finally...
for(i = 0; i < HEIGHT; i++) {
point_x8_t p;
// splat the y value
p.y = vld1q_dup_s16(i);
for(j = 0; j < WIDTH; j++) {
// splat the x value
p.x = vld1q_dup_s16(j);
int32_t idx = point_in_x8(p, rs8, CLIPS8);
if(idx >= 0)
res[1][idx] = 1;
}
}
The vld4 instruction actually has a fairly high latency. Given that WIDTH * HEIGHT is actually a very big number, pre-swizzling here (as a pre-processing step) makes a lot more sense imho.
HOWEVER
This whole algorithm could be massively improved by simply ignoring the pixels, and working on CLIP regions directly.
A clip region will be false if it is entirely contained by the preceding clip regions
for(i = 0; i < CLIPS; i++) {
// if region is empty, ignore.
if(rs[i].width == 0 || rs[i].height == 0) {
res[0][i] = 0;
continue;
}
// first region will always be true (unless it's of zero size)
if(i == 0) {
res[0][1] = 1;
continue;
}
uint32_t how_many_intersect = 0;
bool entirely_contained = false;
uint32_t intersection_indices[CLIPS] = {};
// do a lazy test first.
for(j = i - 1; j >= 0; --j) {
// if the last region is entirely contained by preceding
// ones, it will be false. exit loop.
if(region_is_entirely_contained(rs[i], rs[j])) {
res[0][i] = 0;
entirely_contained = true;
j = -1; ///< break out of loop
}
else
// do the regions intersect?
if(region_intersects(rs[i], rs[j])) {
intersection_indices[how_many_intersect] = j;
++how_many_intersect;
}
}
// if one region entirely contains this clip region, skip it.
if(entirely_contained) {
continue;
}
// if you only intersect one or no regions, the result is true.
if(how_many_intersect <= 1) {
res[0][i] = 1;
continue;
}
// If you get here, the result is *probably* true, however
// you will need to split this clip region against the previous
// ones to be fully sure. If all regions are fully contained,
// the answer is false.
// I won't implement it, but something like this:
* split rs[i] against each rs[intersection_indices[]].
* Throw away the rectangles that are entirely contained.
* Each bit that remains should be tested against each rs[intersection_indices[]]
* If you find any split rectangle that isn't contained,
set to true and move on.
}
I've this sample code :
map *d;
i = MAP_SIZE;
j = sizeof(map);
d = malloc(MAP_SIZE);
if (d == NULL) {
exit(EXIT_FAILURE);
}
dest.x = dest.y = 0;
for (i = 0; i < WINDOW_HEIGHT / AREA_RESOLUTION; i++)
{
for (j = 0; j < WINDOW_WIDTH / AREA_RESOLUTION; j++)
{
k = GetAreaPos(j, i);
Area = d[k];
dest.x = j*AREA_RESOLUTION;
dest.y = i*AREA_RESOLUTION;
if (Area->landType == DESTRUCTIBLE_BRICK) { //GOT ERROR HERE
SDL_QueryTexture(Game_Texture->Explodable, NULL, NULL, &dest.w, &dest.h);
SDL_RenderCopy(Renderer, Game_Texture->Explodable, NULL, &dest);
}
if (Area->landType == INDESTRUCTIBLE_BRICK) {
SDL_QueryTexture(Game_Texture->Solidblock, NULL, NULL, &dest.w, &dest.h);
SDL_RenderCopy(Renderer, Game_Texture->Solidblock, NULL, &dest);
}
}
}
free(d);
MAP_SIZE = sizeof(map)
I use Visual Studio, when i run debug without breakpoints, I've always got a memory access violation in the first loop turns.
with breakpoints and slowly/constant F5 push, no errors, the loop finnish as well...
I don't understand why this error appears, the 2 loops havent 100 turns in this example, the violation is totally random, sometimes in the 5th turn, sometimes in the 90th...
This sample of code is running in another external loop, and when the first execution works fine, the others never has any violation error.
UPDATE 1
Now, i use as well my d variable and exit if malloc() return NULL. The GetAreaPos() return correct value, but the memory violation always appaers on the first if condition at randomly between 2nd and 6th turn of second FOR loop in tests
Header definitions :
#define MAP_SIZE sizeof(map)
#define AREA_SIZE sizeof(union area)
#define AREA_RESOLUTION 64
#define MAP_WIDTH 10//28
#define MAP_HEIGHT 10//14
//#pragma pack(1)
typedef enum {
EMPTY = 00,
INDESTRUCTIBLE_BRICK = 10,
DESTRUCTIBLE_BRICK = 11
} landType;
typedef enum {
BONUS_BOMB_SCOPE = 000,
MALUS_BOMB_SCOPE = 001,
BONUS_BOMB_AMOUNT = 010,
MALUS_BOMB_AMOUNT = 011,
BONUS_PLAYER_SPEED = 100,
MALUS_PLAYER_SPEED = 101,
NO_BONUS_MALUS = 110,
NO_MALUS_BONUS = 111,
} bonusType;
union area {
struct {
bool inFire :4;
landType landType :8;
bool presenceBomb :4;
bool presenceBonus :4;
bonusType typeBonus :12;
};
char c;
};
//#pragma pack(0)
typedef union area map[MAP_WIDTH * MAP_HEIGHT];
you first declare a pointer map *d; and allocate memory for it d = malloc(MAP_SIZE); but you ignore the return value...
if (d == NULL) means the malloc didn't work and this (Area->landType == DESTRUCTIBLE_BRICK) would cause UB [because Area = Map[k];] but your code does not protect against that and that could crash
I'm writing a function which just make the sum of two values,
but the C type of these values is specified in parameter,
it can be int, unsigned int, char, unsigned char, float ...
I search a solution to do that without making a lot of C code to process to all different mixed cases.
To be clear the code is :
void addition(unsigned char type_data_1_uc, void *value_data_1_ptr,
unsigned char type_data_2_uc, void *value_data_2_ptr,
unsigned char type_result_uc, void *value_result_ptr)
{
/* types :
0 : TYPE_BIT
1 : TYPE_CHAR
2 : TYPE_UNSIGNED_CHAR
3 : TYPE_INT
4 : TYPE_UNSIGNED_INT
5 : TYPE_SHORT_INT
6 : TYPE_UNSIGNED_SHORT_INT
7 : TYPE_LONG
8 : TYPE_UNSIGNED_LONG
9 : TYPE_FLOAT */
/* INT + INT = INT */
if ((type_data_1_uc == 3)
&& (type_data_2_uc == 3)
&& (type_result_uc == 3))
{
*((int *) value_result_ptr) = *((int *) value_data_1_ptr) + *((int *) value_data_2_ptr);
}
/* FLOAT + FLOAT = FLOAT */
if ((type_data_1_uc == 9)
&& (type_data_2_uc == 9)
&& (type_result_uc == 9))
{
*((float *) value_result_ptr) = *((int *) value_data_1_ptr) + *((int *) value_data_2_ptr);
}
/* UNSIGNED CHAR + INT = INT */
if ((type_data_1_uc == 2)
&& (type_data_2_uc == 3)
&& (type_result_uc == 3))
{
*((int *) value_result_ptr) = *((unsigned char *) value_data_1_ptr) + *((int *) value_data_2_ptr);
}
/* ......... */
}
int main(int argc, char **argv)
{
int data_1;
int data_2;
int result;
data_1 = 26;
data_2 = 32;
addition(3, &data_1, 3, &data_2, 3, &result);
printf("result = %d\n", result);
return 0;
}
I have think to use the union structure but it doesn't solve the problem, because union also need to be statically casted :
/* UNION DATA */
union data_union
{
char tab_c[4];
unsigned char tab_uc[4];
int i;
unsigned int ui;
short int si;
unsigned short int usi;
long l;
unsigned long ul;
float f;
};
void addition(unsigned char type_data_1_uc, union data_union *value_data_1_ptr,
unsigned char type_data_2_uc, union data_union *value_data_2_ptr,
unsigned char type_result_uc, union data_union *value_result_ptr)
{
/* types :
0 : TYPE_BIT
1 : TYPE_CHAR
2 : TYPE_UNSIGNED_CHAR
3 : TYPE_INT
4 : TYPE_UNSIGNED_INT
5 : TYPE_SHORT_INT
6 : TYPE_UNSIGNED_SHORT_INT
7 : TYPE_LONG
8 : TYPE_UNSIGNED_LONG
9 : TYPE_FLOAT */
/* INT + INT = INT */
if ((type_data_1_uc == 3)
&& (type_data_2_uc == 3)
&& (type_result_uc == 3))
{
(*value_result_ptr).i = (*value_data_1_ptr).i + (*value_data_2_ptr).i;
}
/* FLOAT + FLOAT = FLOAT */
if ((type_data_1_uc == 9)
&& (type_data_2_uc == 9)
&& (type_result_uc == 9))
{
(*value_result_ptr).f = (*value_data_1_ptr).f + (*value_data_2_ptr).f;
}
/* UNSIGNED CHAR + INT = INT */
if ((type_data_1_uc == 2)
&& (type_data_2_uc == 3)
&& (type_result_uc == 3))
{
(*value_result_ptr).i = (*value_data_1_ptr).uc + (*value_data_2_ptr).i;
}
}
int main(int argc, char **argv)
{
static union data_union data_1_union;
static union data_union data_2_union;
static union data_union result_union;
memset(&data_1_union, 0, sizeof(union data_union));
memset(&data_2_union, 0, sizeof(union data_union));
data_1_union.i = 26;
data_2_union.i = 32;
addition(3, &data_1_union, 3, &data_2_union, 3, &result_union);
printf("result_union.i = %d\n", result_union.i);
return 0;
}
Any idea to solve this ?
You cannot do that without some "pain", since C is statically typed language. The compiler needs to know the types of variables in order to generate the proper instructions. Most CPU:s have distinct instructions for adding 8-bit integers, 32-bit integers, floats, and so on.
That said, you can certainly improve on the interface: I would use variable arguments to make the prototype:
typedef enum {
TYPE_BIT = 0,
TYPE_CHAR,
TYPE_UNSIGNED_CHAR,
TYPE_INT,
TYPE_UNSIGNED_INT,
TYPE_SHORT_INT,
TYPE_UNSIGNED_SHORT_INT,
TYPE_LONG,
TYPE_UNSIGNED_LONG,
TYPE_FLOAT,
} Type;
void addition(Type type, void *result, ...);
This expects to be called with four arguments, the two latter of which should have the type indicated by the type argument. The result is stored at result, which should be a pointer to the same type as the arguments.
Not sure how to represent single-bit values, probably as unsigned char but it's kind of pointless: single bits is not a type that you can do arithmetic with in C so you're just going to end up doing the add with more bits, then masking some of them off. You also can't have a pointer to a single bit in memory on most machines.
The idiomatic C approach to this problem is to use macros to eliminate code duplication as much as possible. The rest will unfortunately have to be done manually. For example:
enum typecode {
TC_INT = 3,
TC_FLOAT = 9,
/* ... */
};
void addition(enum typecode type_data_1_uc, void *value_data_1_ptr,
enum typecode type_data_2_uc, void *value_data_2_ptr,
enum typecode type_result_uc, void *value_result_ptr)
{
#define DO_ADD(tc1, tc2, tcret, type1, type2, typeret) do { \
if (type_data_1_uc == tc1 && type_data_2_uc == tc2 \
&& type_result_uc == tcret) { \
*(typeret *)value_result_ptr = \
*(type1 *)(value_data_1_ptr) + *(type2 *)(value_data_2_ptr) \
return; \
} while (0)
/* INT + INT = INT */
DO_ADD(TC_INT, TC_INT, TC_INT, int, int, int);
/* FLOAT + FLOAT = FLOAT */
DO_ADD(TC_FLOAT, TC_FLOAT, TC_FLOAT, float, float, float);
/* UCHAR + INT = INT */
DO_ADD(TC_UCHAR, TC_INT, TC_INT, unsigned char, int, int);
/* ... */
#undef DO_ADD
/* none matched: abort() or set an error code, as appropriate */
}
How can I initialize this nested struct in C?
typedef struct _s0 {
int size;
double * elems;
}StructInner ;
typedef struct _s1 {
StructInner a, b, c, d, e;
long f;
char[16] s;
}StructOuter; StructOuter myvar = {/* what ? */ };
To initialize everything to 0 (of the right kind)
StructOuter myvar = {0};
To initialize the members to a specific value
StructOuter myvar = {{0, NULL}, {0, NULL}, {0, NULL},
{0, NULL}, {0, NULL}, 42.0, "foo"};
/* that's {a, b, c, d, e, f, s} */
/* where each of a, b, c, d, e is {size, elems} */
Edit
If you have a C99 compiler, you can also use "designated initializers", as in:
StructOuter myvar = {.c = {1000, NULL}, .f = 42.0, .s = "foo"};
/* c, f, and s initialized to specific values */
/* a, b, d, and e will be initialized to 0 (of the right kind) */
To highlight struct labels in particular:
StructInner a = {
.size: 1,
.elems: { 1.0, 2.0 }, /* optional comma */
};
StructOuter b = {
.a = a, /* struct labels start with a dot */
.b = a,
a, /* they are optional and you can mix-and-match */
a,
.e = { /* nested struct initialization */
.size: 1,
.elems: a.elems
},
.f = 1.0,
.s = "Hello", /* optional comma */
};
double a[] = { 1.0, 2.0 };
double b[] = { 1.0, 2.0, 3.0 };
StructOuter myvar = { { 2, a }, { 3, b }, { 2, a }, { 3, b }, { 2, a }, 1, "a" };
It seems a and b cannot be initialized in-place in plain C
Following also works with GCC, C99. GCC doesnt complain about it. I am not sure if this is standard.
double arr[] = { 1.0, 2.0 }; // should be static or global
StructOuter myvar =
{
.f = 42,
.s = "foo",
.a.size = 2,
.a.elems = &arr,
.b.size = 0, // you can explicitly show that it is zero
// missing members will actually be initialized to zero
};