I'm interested in how you can write data in a particular structure to a pre-allocated memory block.
The idea is to write down the render commands and then read them in a loop. The problem is that allocating and deleting memory every cycle, which is 60 times per second is bad and I want to ask how can I write and read different types of data to the buffer.
example:
struct cmd_viewport {
int x, y, w, h;
};
struct cmd_line {
int x0, y0, x1, y1;
unsigned int width;
};
void* buffer_alloc(void* buffer, size_t offset, size_t size) {
...
}
void* buffer_get_ptr(void* buffer, size_t offset) {
...
}
int main(int argc, char* argv) {
void* cmd_buffer;
struct cmd_viewport* viewport;
struct cmd_line* line;
struct cmd_line* line2;
cmd_buffer = malloc(1024);
if (cmd_buffer == NULL) {
return EXIT_FAILURE;
}
viewport = (struct cmd_viewport*)buffer_alloc(cmd_buffer, 0, sizeof(struct cmd_viewport));
line = (struct cmd_line*)buffer_alloc(cmd_buffer, sizeof(struct cmd_viewport), sizeof(struct cmd_line));
/* it must be a direct write to the cmd_buffer memory location */
viewport->x = 0;
viewport->y = 0;
viewport->w = 800;
viewport->h = 600;
line->x0 = 100;
line->y0 = 100;
line->x1 = 300;
line->y1 = 400;
line->width = 2;
line2 = (struct cmd_line*)buffer_get_ptr(cmd_buffer, sizeof(struct cmd_viewport));
printf("line->width = %d\n", line2->width);
free(cmd_buffer);
return EXIT_SUCCESS;
}
How can you make a void* buffer cast from a specific offset to any data type ?
Related
In this thread I was suggested to use max_align_t in order to get an address properly aligned for any type, I end up creating this implementation of a dynamic array:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
struct vector {
size_t capacity;
size_t typesize;
size_t size;
max_align_t data[];
};
#define VECTOR(v) ((struct vector *)((unsigned char *)v - offsetof(struct vector, data)))
static void *valloc(size_t typesize, size_t size)
{
struct vector *vector;
vector = calloc(1, sizeof(*vector) + typesize * size);
if (vector == NULL) {
return NULL;
}
vector->typesize = typesize;
vector->capacity = size;
vector->size = 0;
return vector->data;
}
static void vfree(void *data, void (*func)(void *))
{
struct vector *vector = VECTOR(data);
if (func != NULL) {
for (size_t iter = 0; iter < vector->size; iter++) {
func((unsigned char *)vector->data + vector->typesize * iter);
}
}
free(vector);
}
static void *vadd(void *data)
{
struct vector *vector = VECTOR(data);
struct vector *new;
size_t capacity;
if (vector->size >= vector->capacity) {
capacity = vector->capacity * 2;
new = realloc(vector, sizeof(*vector) + vector->typesize * capacity);
if (new == NULL) {
return NULL;
}
new->capacity = capacity;
new->size++;
return new->data;
}
vector->size++;
return vector->data;
}
static size_t vsize(void *data)
{
return VECTOR(data)->size;
}
static void vsort(void *data, int (*comp)(const void *, const void *))
{
struct vector *vector = VECTOR(data);
if (vector->size > 1) {
qsort(vector->data, vector->size, vector->typesize, comp);
}
}
static char *vgetline(FILE *file)
{
char *data = valloc(sizeof(char), 32);
size_t i = 0;
int c;
while (((c = fgetc(file)) != '\n') && (c != EOF)) {
data = vadd(data);
data[i++] = (char)c;
}
data = vadd(data);
data[i] = '\0';
return data;
}
struct data {
int key;
char *value;
};
static int comp_data(const void *pa, const void *pb)
{
const struct data *a = pa;
const struct data *b = pb;
return strcmp(a->value, b->value);
}
static void free_data(void *ptr)
{
struct data *data = ptr;
vfree(data->value, NULL);
}
int main(void)
{
struct data *data;
data = valloc(sizeof(struct data), 1);
if (data == NULL) {
perror("valloc");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < 5; i++) {
data = vadd(data);
if (data == NULL) {
perror("vadd");
exit(EXIT_FAILURE);
}
data[i].value = vgetline(stdin);
data[i].key = (int)vsize(data[i].value);
}
vsort(data, comp_data);
for (size_t i = 0; i < vsize(data); i++) {
printf("%d %s\n", data[i].key, data[i].value);
}
vfree(data, free_data);
return 0;
}
But I'm not sure if I can use max_align_t to store a chunk of bytes:
struct vector {
size_t capacity;
size_t typesize;
size_t size;
max_align_t data[]; // Used to store any array,
// for example an array of 127 chars
};
Does it break the one past the last element of an array rule?
Does it break the one past the last element of an array rule?
No.
Using max_align_t to store a chunk of bytes
OP's issue is not special because it uses a flexible array member.
As a special case, the last element of a structure ... have an incomplete array type; this is called a flexible array member. ... However, when a . (or ->) operator has a left operand that is (a pointer to) a structure with a flexible array member and the right operand names that member, it behaves as if that member were replaced with the longest array (with the same element type) ...
It is the same issue as accessing any allocated memory or array of one type as if it was another type.
The conversion from max_align_t * to char * to void * is well defined when alignment is done right.
A pointer to an object type may be converted to a pointer to a different object type. If the resulting pointer is not correctly aligned for the referenced type, the behavior is undefined. C11dr ยง6.3.2.3 7
All reviewed accessing in code do not attempt to access outside the "as if" array.
I have a structure with arrays of structures inside in C, and I need a copy of that in the GPU. For that I am writing a function that makes some cudaMalloc and cudaMemcpys of the variables in the struct from host to device.
A simple version (the real one has various structs and variables/arrays inside) of the struct is:
struct Node {
float* position;
};
struct Graph{
unsigned int nNode;
Node* node;
unsigned int nBoundary;
unsigned int* boundary;
};
My problem is that I must be doing something wrong in the memory allocation and copy of the struct. When I copy the variables withing Graph, I can see that they are properly copied (by accessing it in a kernel as in the example below). For example, I can check that graph.nBoundary=3.
However, I can only see this if I do not allocate and copy the memory of Node *. If I do, I get -858993460 instead of 3. Interestingly, Node * is not wrongly allocated, as I can inspect the value of say graph.node[0].pos[0] and it has the correct value.
This only happens with the graph.nBoundary. All the other variables remain with the correct numerical values, but this one gets "wronged" when running the cudaMemcpy of the Node*.
What am I doing wrong and why does this happen? How do I fix it?
Let me know if you need more information.
MCVE:
#include <algorithm>
#include <cuda_runtime_api.h>
#include <cuda.h>
// A point, part of some elements
struct Node {
float* position;
};
struct Graph{
unsigned int nNode;
Node* node;
unsigned int nBoundary;
unsigned int* boundary;
};
Graph* cudaGraphMalloc(const Graph* inGraph);
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void testKernel(Graph* graph,unsigned int * d_res){
d_res[0] = graph->nBoundary;
};
int main()
{
// Generate some fake data on the CPU
Graph graph;
graph.node = (Node*)malloc(2 * sizeof(Node));
graph.boundary = (unsigned int*)malloc(3 * sizeof(unsigned int));
for (int i = 0; i < 3; i++){
graph.boundary[i] = i + 10;
}
graph.nBoundary = 3;
graph.nNode = 2;
for (int i = 0; i < 2; i++){
// They can have different sizes in the original code
graph.node[i].position = (float*)malloc(3 * sizeof(float));
graph.node[i].position[0] = 45;
graph.node[i].position[1] = 1;
graph.node[i].position[2] = 2;
}
// allocate GPU memory
Graph * d_graph = cudaGraphMalloc(&graph);
// some dummy variables to test on GPU.
unsigned int * d_res, *h_res;
cudaMalloc((void **)&d_res, sizeof(unsigned int));
h_res = (unsigned int*)malloc(sizeof(unsigned int));
//Run kernel
testKernel << <1, 1 >> >(d_graph, d_res);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaMemcpy(h_res, d_res, sizeof(unsigned int), cudaMemcpyDeviceToHost));
printf("%u\n", graph.nBoundary);
printf("%d", h_res[0]);
return 0;
}
Graph* cudaGraphMalloc(const Graph* inGraph){
Graph* outGraph;
gpuErrchk(cudaMalloc((void**)&outGraph, sizeof(Graph)));
//copy constants
gpuErrchk(cudaMemcpy(&outGraph->nNode, &inGraph->nNode, sizeof(unsigned int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(&outGraph->nBoundary, &inGraph->nBoundary, sizeof(unsigned int), cudaMemcpyHostToDevice));
// copy boundary
unsigned int * d_auxboundary, *h_auxboundary;
h_auxboundary = inGraph->boundary;
gpuErrchk(cudaMalloc((void**)&d_auxboundary, inGraph->nBoundary*sizeof(unsigned int)));
gpuErrchk(cudaMemcpy(d_auxboundary, h_auxboundary, inGraph->nBoundary*sizeof(unsigned int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(&outGraph->boundary, d_auxboundary, sizeof(unsigned int *), cudaMemcpyDeviceToDevice));
//Create nodes
Node * auxnode;
gpuErrchk(cudaMalloc((void**)&auxnode, inGraph->nNode*sizeof(Node)));
// Crate auxiliary pointers to grab them from host and pass them to device
float ** d_position, ** h_position;
d_position = static_cast<float **>(malloc(inGraph->nNode*sizeof(float*)));
h_position = static_cast<float **>(malloc(inGraph->nNode*sizeof(float*)));
for (int i = 0; i < inGraph->nNode; i++){
// Positions
h_position[i] = inGraph->node[i].position;
gpuErrchk(cudaMalloc((void**)&d_position[i], 3 * sizeof(float)));
gpuErrchk(cudaMemcpy(d_position[i], h_position[i], 3 * sizeof(float), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(&auxnode[i].position, d_position[i], sizeof(float *), cudaMemcpyDeviceToDevice));
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////// If I comment the following section, nBoundary can be read by the kernel
///////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////
gpuErrchk(cudaMemcpy(&outGraph->node, auxnode, inGraph->nNode*sizeof(Node *), cudaMemcpyDeviceToDevice));
return outGraph;
}
The problem is in the function cudaGraphMalloc where you are trying to allocate device memory to the members of outGraph which has already been allocated on the device. In process of doing so, you are de-referencing a device pointer on host which is illegal.
To allocate device memory to members of struct type variable which exists on the device, we first have to create a temporary host variable of that struct type, then allocate device memory to its members, and then copy it to the struct which exists on the device.
I have answered a similar question here. Please take a look at it.
The fixed code may look like this:
#include <algorithm>
#include <cuda_runtime.h>
#include <cuda.h>
// A point, part of some elements
struct Node {
float* position;
};
struct Graph {
unsigned int nNode;
Node* node;
unsigned int nBoundary;
unsigned int* boundary;
};
Graph* cudaGraphMalloc(const Graph* inGraph);
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void testKernel(Graph* graph, unsigned int * d_res) {
d_res[0] = graph->nBoundary;
};
int main()
{
// Generate some fake data on the CPU
Graph graph;
graph.node = (Node*)malloc(2 * sizeof(Node));
graph.boundary = (unsigned int*)malloc(3 * sizeof(unsigned int));
for (int i = 0; i < 3; i++) {
graph.boundary[i] = i + 10;
}
graph.nBoundary = 3;
graph.nNode = 2;
for (int i = 0; i < 2; i++) {
// They can have different sizes in the original code
graph.node[i].position = (float*)malloc(3 * sizeof(float));
graph.node[i].position[0] = 45;
graph.node[i].position[1] = 1;
graph.node[i].position[2] = 2;
}
// allocate GPU memory
Graph * d_graph = cudaGraphMalloc(&graph);
// some dummy variables to test on GPU.
unsigned int * d_res, *h_res;
cudaMalloc((void **)&d_res, sizeof(unsigned int));
h_res = (unsigned int*)malloc(sizeof(unsigned int));
//Run kernel
testKernel << <1, 1 >> >(d_graph, d_res);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaMemcpy(h_res, d_res, sizeof(unsigned int), cudaMemcpyDeviceToHost));
printf("%u\n", graph.nBoundary);
printf("%u\n", h_res[0]);
return 0;
}
Graph* cudaGraphMalloc(const Graph* inGraph)
{
//Create auxiliary Graph variable on host
Graph temp;
//copy constants
temp.nNode = inGraph->nNode;
temp.nBoundary = inGraph->nBoundary;
// copy boundary
gpuErrchk(cudaMalloc((void**)&(temp.boundary), inGraph->nBoundary * sizeof(unsigned int)));
gpuErrchk(cudaMemcpy(temp.boundary, inGraph->boundary, inGraph->nBoundary * sizeof(unsigned int), cudaMemcpyHostToDevice));
//Create nodes
size_t nodeBytesTotal = temp.nNode * sizeof(Node);
gpuErrchk(cudaMalloc((void**)&(temp.node), nodeBytesTotal));
for (int i = 0; i < temp.nNode; i++)
{
//Create auxiliary node on host
Node auxNodeHost;
//Allocate device memory to position member of auxillary node
size_t nodeBytes = 3 * sizeof(float);
gpuErrchk(cudaMalloc((void**)&(auxNodeHost.position), nodeBytes));
gpuErrchk(cudaMemcpy(auxNodeHost.position, inGraph->node[i].position, nodeBytes, cudaMemcpyHostToDevice));
//Copy auxillary host node to device
Node* dPtr = temp.node + i;
gpuErrchk(cudaMemcpy(dPtr, &auxNodeHost, sizeof(Node), cudaMemcpyHostToDevice));
}
Graph* outGraph;
gpuErrchk(cudaMalloc((void**)&outGraph, sizeof(Graph)));
gpuErrchk(cudaMemcpy(outGraph, &temp, sizeof(Graph), cudaMemcpyHostToDevice));
return outGraph;
}
Be advised that you will have to keep the host copies of internal device pointers (i.e. the auxiliary host variables). This is because you will have to free the device memory later and since you will only have a device copy of Graph in the main code, you won't be able to access its members from the host to call cudaFree on them. In this case the variable Node auxNodeHost (created in each iteration) and Graph temp are those variables.
The above code does not do that and is just for demonstration purpose.
Tested on Windows 10, Visual Studio 2015, CUDA 9.2, NVIDIA Driver 397.44.
I wrote the following struct for a vector:
typedef struct Vector_ Vector;
struct Vector_
{
int dim;
double* vector;
};
And I wrote a function to initialise a new instance of that struct:
Vector* newVector(int length){
int i = 0;
Vector* vec = malloc(sizeof(Vector));
assert(vec != NULL);
assert(length > 0);
vec->dim = length;
vec->vector = malloc((length+1)*sizeof(double));
assert(vec->vector != NULL);
for(i=0;i<=length+1;i++) vec->vector[i] = 0;
return vec;
}
This all works, but then I wrote a function to reallocate memory and change the length of the vector like this:
Vector* setVectorLength(Vector* vec, int length)
{
assert(vec->dim > 0);
int i = 0;
if(length > vec->dim)
{
vec->vector = realloc(vec->vector,(length+1)*sizeof(double));
assert(vec->vector != NULL);
for(i = vec->dim+1;i<=length;i++)
{
vec->vector[i] = 0;
}
}
else
{
vec->vector = realloc(vec->vector,length+1 * sizeof(double));
}
vec->dim = length;
return vec;
}
And I call it like this:
int main()
{
Vector* newvec = newVector(5);
setVectorLength(newvec,8);
}
For some reason, the line in the setVectorLength function where I reallocate the memory of the vector array causes the program to abort. If I comment out that section, everything works. Interestingly, I am able to write entries to the array even without reallocating it to a sufficiently high length, so for example, if I only initialise the vector struct and give it the length 5, then I can write values into the 7th or 8th spot of the vec->vector array, even though I never allocated that much memory to it. Any hints?
Here is my problem: I have to make this program for school and I spent the last hour debugging and googling and haven't found an answer.
I have an array of structures in my main and I want to give that array to my function seteverythingup (by call by reference) because in this function a string I read from a file is split up, and I want to write it into the structure but I always get a SIGSEV error when strcpy with the struct array.
This is my main:
int main(int argc, char *argv[])
{
FILE* datei;
int size = 10;
int used = 0;
char line[1000];
struct raeume *arr = (raeume *) malloc(size * sizeof(raeume*));
if(arr == NULL){
return 0;
}
if(argc < 2){
return 0;
}
datei = fopen(argv[1], "rt");
if(datei == NULL){
return 0;
}
fgets(line,sizeof(line),datei);
while(fgets(line,sizeof(line),datei)){
int l = strlen(line);
if(line[l-1] == '\n'){
line[l-1] = '\0';
}
seteverythingup(&line,arr,size,&used);
}
ausgabeunsortiert(arr,size);
fclose(datei);
return 0;
}
and this is my function:
void seteverythingup(char line[],struct raeume *arr[], int size,int used)
{
char *token,raumnummer[5],klasse[6];
int tische = 0;
const char c[2] = ";";
int i=0;
token = strtok(line, c);
strcpy(raumnummer,token);
while(token != NULL )
{
token = strtok(NULL, c);
if(i==0){
strcpy(klasse,token);
}else if(i==1){
sscanf(token,"%d",&tische);
}
i++;
}
managesize(&arr[size],&size,used);
strcpy(arr[used]->number,raumnummer);
strcpy(arr[used]->klasse,klasse);
arr[used]->tische = tische;
used++;
}
Edit: Since there is more confusion I wrote a short program that works out the part you are having trouble with.
#include <cstdlib>
struct raeume {
int foo;
int bar;
};
void seteverythingup(struct raeume *arr, size_t len) {
for (size_t i = 0; i < len; ++i) {
arr[i].foo = 42;
arr[i].bar = 53;
}
}
int main() {
const size_t size = 10;
struct raeume *arr = (struct raeume*) malloc(size * sizeof(struct raeume));
seteverythingup(arr, size);
return 0;
}
So basically the signature of your functions is somewhat odd. Malloc returns you a pointer to a memory location. So you really dont need a pointer to an array. Just pass the function the pointer you got from malloc and the function will be able to manipulate that region.
Original Answer:
malloc(size * sizeof(raeume*));
This is probably the part of the code that gives you a hard time. sizeof returns the size of a type. You ask sizeof how many bytes a pointer to you raeume struct requires. what you probably wanted to do is ask for the size of the struct itself and allocate size times space for that. So the correct call to malloc would be:
malloc(size * sizeof(struct raeume));
So I needed a simple allocator to allocate (on occasion with zeroing) and later free 4K blocks from a pool of mapped memory. However, after implementing this, while testing I found that after freeing a block or two, if I tried to allocate a block, the program would SEGFAULT.
Curiously enough, when I free multiple blocks in a row, nothing seems to break.
Some important definitions collected from other files:
#define xmattr_constant __attribute__((const))
#define xmattr_malloc __attribute__((malloc))
#define xmattr_pure __attribute__((pure))
#define xm_likely(x) __builtin_expect(!!(x), 1)
#define xm_unlikely(x) __builtin_expect(!!(x), 0)
#define ABLKLEN 4096 // 4K pagesize
typedef struct {
uint8_t magic[16]; // "sfDB5" "vX.XXXXXXX" '\0'
uint8_t *freelist;
uint64_t size;
uint64_t bounds;
} arenaheader;
Allocation code:
void *pd_arena;
void pd_init (size_t len, uint8_t *map) {
int x;
size_t const block = len / 256; // arena physical size
size_t const size = (block / ABLKLEN) * ABLKLEN; // arena useable size
arenaheader *header;
for (x = 0; x < 256; x++) {
header = (void *) &(map[x * block]);
header->freelist = NULL; // no free blocks because all are free
header->size = size; // useable size
header->bounds = ABLKLEN; // current bounds
}
return;
}
xmattr_malloc void *pd_mallocBK (void) {
arenaheader *header = pd_arena;
uint8_t *ptr;
if (xm_unlikely (header->freelist)) { // there's a sitting free block
ptr = header->freelist; // return the free block
void **next = ptr;
header->freelist = *next; // update the free list
} else if (xm_likely (header->bounds < header->size)) { // no free blocks
ptr = pd_arena;
ptr += header->size;
header->size += ABLKLEN;
} else { // no more blocks
ptr = NULL;
}
return ptr;
}
xmattr_malloc void *pd_callocBK (void) {
void *ptr = pd_mallocBK ();
if (xm_likely (ptr)) // allocation was successful
memset (ptr, 0, ABLKLEN);
return ptr;
}
void pd_freeBK (void *ptr) {
arenaheader *header = pd_arena;
if (xm_likely (ptr)) { // non-NULL ptr
void *next = header->freelist; // get current top of stack
void **this = ptr;
*this = next; // move address of current top of stack to ptr
header->freelist = ptr; // push ptr to stack
}
return;
}
Test code:
#define F_LEN (1024 * 1024 * 1024) // 1 GB
#define A_LEN (F_LEN / 256)
int main (int argc, char **argv) {
int x, y;
// setup
int fd;
uint8_t *map;
assert (fd = open ("./pd_single.testout", O_CREAT | O_RDWR | O_EXCL));
if (ftruncate (fd, F_LEN)) {
perror ("ftruncate failed: ");
return 1;
}
assert (map = mmap (NULL, F_LEN, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0));
uint8_t *arena[256];
for (x = 0; x < 256; x++)
arena[x] = map + (x * A_LEN);
// test
volatile int *var;
void *list[512];
int lcnt = 0;
pd_init (F_LEN, map);
// per arena test
for (x = 0; x < 256; x++) {
pd_arena = arena[x];
// allocate and write a few times
for (y = 0; y < 256; y++) {
assert ((list[lcnt] = pd_mallocBK ()));
var = list[lcnt];
*var = (x + 1) * (y + 1);
}
// free some but not all
for (y = 0; y < 64; y++)
pd_freeBK (list[lcnt]);
// now reallocate some and write some
for (y = 0; y < 16; y++) {
assert ((list[lcnt] = pd_mallocBK()));
var = list[lcnt];
*var = 16;
}
}
// cleanup
munmap (map, F_LEN);
close (fd);
return 0;
}
After running the program through gdb, I found that it SEGFAULTs within pd_mallocBK(); specifically, on this line:
header->freelist = *next; // update the free list
However, I can't seem to understand what is wrong with that line and/or how to fix it.
So, two questions, really (in order of importance, most to least):
What is wrong with the selected line and how can I fix it?
Are there any other allocators to which I can simply assign a region of mapped memory to use instead of having to implement this?
The following code works better than the original, but still crashes eventually when starting to work on the last arena.
#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#define xmattr_malloc __attribute__((malloc))
#define xm_likely(x) __builtin_expect(!!(x), 1)
#define xm_unlikely(x) __builtin_expect(!!(x), 0)
enum { ABLKLEN = 4096 };
void pd_freeBK(void *ptr);
xmattr_malloc void *pd_callocBK(void);
xmattr_malloc void *pd_mallocBK(void);
void pd_init(size_t len, uint8_t *map);
typedef struct {
uint8_t magic[16]; // "sfDB5" "vX.XXXXXXX" '\0'
uint8_t *freelist;
uint64_t size;
uint64_t bounds;
} arenaheader;
static void *pd_arena;
static void pd_dump_arena(FILE *fp, const char *tag, const arenaheader *arena)
{
assert(arena != NULL);
fprintf(fp, "Arena: 0x%.8" PRIXPTR " - %s\n", (uintptr_t)arena, tag);
fprintf(fp, "Size: %.8" PRIu64 ", Bounds: %.8" PRIu64 ", Freelist: 0x%.8" PRIXPTR "\n",
arena->size, arena->bounds, (uintptr_t)arena->freelist);
}
void pd_init(size_t len, uint8_t *map)
{
size_t const block = len / 256; // arena physical size
size_t const size = (block / ABLKLEN) * ABLKLEN; // arena useable size
arenaheader *header;
for (int x = 0; x < 256; x++)
{
header = (void *) &(map[x * block]);
header->freelist = NULL; // no free blocks because all are free
header->size = size; // useable size
header->bounds = ABLKLEN; // current bounds
}
for (int x = 0; x < 256; x++)
{
char buffer[32];
sprintf(buffer, "arena %.3d", x);
pd_dump_arena(stdout, buffer, (arenaheader *)&map[x * block]);
}
}
xmattr_malloc void *pd_mallocBK(void)
{
arenaheader *header = pd_arena;
void *ptr;
if (xm_unlikely(header->freelist)) // there's a sitting free block
{
ptr = header->freelist; // return the free block
void **next = ptr;
header->freelist = *next; // update the free list
}
else if (xm_likely(header->bounds < header->size)) // no free blocks
{
ptr = pd_arena;
ptr = (uint8_t *)ptr + header->size;
header->size += ABLKLEN;
}
else // no more blocks
{
ptr = NULL;
}
return ptr;
}
xmattr_malloc void *pd_callocBK(void)
{
void *ptr = pd_mallocBK();
if (xm_likely(ptr)) // allocation was successful
memset(ptr, 0, ABLKLEN);
return ptr;
}
void pd_freeBK(void *ptr)
{
arenaheader *header = pd_arena;
if (xm_likely(ptr)) // non-NULL ptr
{
void *next = header->freelist; // get current top of stack
void **this = ptr;
*this = next; // move address of current top of stack to ptr
header->freelist = ptr; // push ptr to stack
}
}
enum { NUM_ARENAS = 256 };
#define F_LEN (1024 * 1024 * 1024) // 1 GB
#define A_LEN (F_LEN / NUM_ARENAS)
int main(void)
{
const char filename[] = "./pd_single.testout";
// setup
//int fd = open(filename, O_CREAT | O_RDWR | O_EXCL, 0444);
int fd = open(filename, O_CREAT | O_RDWR, 0600);
assert(fd >= 0);
if (ftruncate(fd, F_LEN))
{
unlink(filename);
perror("ftruncate failed: ");
return 1;
}
uint8_t *map = mmap(NULL, F_LEN, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
assert(map != MAP_FAILED);
uint8_t *arena[NUM_ARENAS];
for (int x = 0; x < NUM_ARENAS; x++)
arena[x] = map + (x * A_LEN);
pd_init(F_LEN, map);
// test
void *list[512];
// per arena test
for (int x = 0; x < NUM_ARENAS; x++)
{
int lcnt = 0;
pd_arena = arena[x];
printf("Arena[%.3d] = 0x%.8" PRIXPTR "\n", x, (uintptr_t)pd_arena);
// allocate and write a few times
for (int y = 0; y < 256; y++)
{
assert((list[lcnt] = pd_mallocBK()));
int *var = list[lcnt];
*var = (x + 1) * (y + 1);
printf("[%.3d] data 0x%.8" PRIXPTR " = %d\n", y, (uintptr_t)list[lcnt], *var);
lcnt++;
}
// free some but not all
lcnt = 0;
for (int y = 0; y < 64; y++)
{
printf("[%.3d] free 0x%.8" PRIXPTR " = %d\n", y, (uintptr_t)list[lcnt], *(int *)list[lcnt]);
pd_freeBK(list[lcnt]);
lcnt++;
}
// now reallocate some and write some
lcnt = 0;
for (int y = 0; y < 16; y++)
{
assert((list[lcnt] = pd_mallocBK()));
int *var = list[lcnt];
*var = 16;
printf("[%.3d] data 0x%.8" PRIXPTR " = %d\n", y, (uintptr_t)list[lcnt], *var);
lcnt++;
}
}
// cleanup
munmap(map, F_LEN);
close(fd);
unlink(filename);
return 0;
}
I've not yet tracked down the residual bug. Note the diagnostic printing (verbose) and the different handling of lcnt in main(). You were busy freeing the same memory multiple times, but not detecting that in your pd_freeBK() code. You were also leaking memory because you were not incrementing lcnt in main().