C: Multi producer, multi consumer bounded queue - c

I try (better tried) to implement a circular buffer with the following interface:
ring_buffer *ring_buffer_create(int capacity, int element_size);
void ring_buffer_destroy(ring_buffer *buffer)
const void *ring_buffer_read_acquire(ring_buffer *buffer, ring_buffer_loc *loc);
void ring_buffer_read_finish(ring_buffer *buffer, ring_buffer_loc loc);
void *ring_buffer_write_acquire(ring_buffer *buffer, ring_buffer_loc *loc);
void ring_buffer_write_finish(ring_buffer *buffer, ring_buffer_loc loc);
It should be possible to read / write multiple elements concurrently (and even in parallel). E.g.:
ring_buffer *buffer = ring_buffer_create(10, sizeof(int));
/* Write a single element */
ring_buffer_loc loc0;
int *i0 = ring_buffer_write_acquire(buffer, &loc);
*i0 = 42; // this could be a big data structure and way more expensive
ring_buffer_write_finish(buffer, loc0);
/* Write "concurrently" */
ring_buffer_loc loc1, loc2;
int *i1 = ring_buffer_write_acquire(buffer, &loc);
int *i2 = ring_buffer_write_acquire(buffer, &loc);
*i1 = 1729;
*i2 = 314;
ring_buffer_write_finish(buffer, loc1);
ring_buffer_write_finish(buffer, loc2);
All "acquire"-functions should be blocking until the operation is possible.
So far, so good. I thought this is simple and so I started with a clean implementation which is based on mutex. But soon I could see that this was far too slow for my use-case (100'000 writes and reads per second), so I switched over to spin-locks etc.
My implementation became quite messy and at some point (now), I started to think about why not something "simple" like this with the desired interface already exists? Probably, it is anyway not a great idea to re-implement something like this.
Maybe someone knows an implementation which has such an interface and which is blocking if the operation is not possible? I was looking quite long in the internet, but I could not find a good match for my problem. Maybe my desired interface is just "bad" or "wrong"?
Nevertheless, I add my current code. It basically assigns each "cell" (=value) a state which can be NONE (not set; the cell is basically empty), WRITING (someone acquired the cell to write data), READING (someone acquired the cell to read) and SET (the cell has a value which could be read). Each cell has a spin-lock which is used to update the cell state.
It then works like this:
When someone acquires a read and the current cell has the state "SET", then the value can be read (new state is READING) and the read index is increased. In all other cases a conditional variable is used to wait until an element is available. When an element read is finished, the cell state is changed to NONE and if any writers are waiting, a conditional variable signal is sent.
The same is true if a cell write is acquires. The only difference is that the cell needs the state "NONE" to be used and possible readers are signaled if there are any.
For some reasons the code sometimes locks and so I had to add a "dirty" timeout to my conditional variable. I would already be super happy if this could be solved, because the "timeout" basically makes the code polling (which is relatively ugly) and at the same time many context switches are done. Maybe someone sees the bug? The "new" code also has the disadvantage that it sometimes is really slow which is like a killer for my application. I attached the "old" and the "new" code (the changed lines are marked).
Thank you for helping me:)!
#include <stdio.h>
#include <stdlib.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <assert.h>
#include <pthread.h>
#include <errno.h>
#include <unistd.h>
typedef int ring_buffer_loc;
enum t_ring_buffer_cell_state
{
NONE = 0,
WRITING = 1,
READING = 2,
SET = 3
};
typedef struct {
char *buffer; // data
atomic_int_fast8_t *states; // state per cell
pthread_spinlock_t *locks; // lock per cell
int capacity;
int element_size;
pthread_spinlock_t read_i_lock;
int read_i;
pthread_spinlock_t write_i_lock;
int write_i;
pthread_spinlock_t waiting_readers_lock;
int waiting_readers;
pthread_spinlock_t waiting_writers_lock;
int waiting_writers;
pthread_mutex_t value_written_lock;
pthread_mutex_t value_read_lock;
pthread_cond_t value_written;
pthread_cond_t value_read;
} ring_buffer;
ring_buffer *ring_buffer_create(int capacity, int element_size)
{
ring_buffer *res = calloc(1, sizeof(ring_buffer));
res->buffer = calloc(capacity, element_size);
res->states = calloc(capacity, sizeof(*res->states));
res->locks = malloc(capacity * sizeof(*res->locks));
for (int i = 0; i < capacity; ++i) {
pthread_spin_init(&res->locks[i], PTHREAD_PROCESS_PRIVATE);
}
pthread_spin_init(&res->write_i_lock, PTHREAD_PROCESS_PRIVATE);
pthread_spin_init(&res->read_i_lock, PTHREAD_PROCESS_PRIVATE);
pthread_spin_init(&res->waiting_readers_lock, PTHREAD_PROCESS_PRIVATE);
pthread_spin_init(&res->waiting_writers_lock, PTHREAD_PROCESS_PRIVATE);
res->capacity = capacity;
res->element_size = element_size;
return res;
}
void ring_buffer_destroy(ring_buffer *buffer)
{
free(buffer->buffer);
free(buffer->states);
free(buffer);
}
static inline void ring_buffer_inc_index(ring_buffer *buffer, int *index)
{
*index = (*index + 1) % buffer->capacity;
}
void timespec_now_plus_ms(struct timespec *result, long ms_to_add)
{
const int one_second_us = 1000 * 1000 * 1000;
timespec_get(result, TIME_UTC);
const long nsec = result->tv_nsec + ms_to_add * 1000 * 1000;
result->tv_sec += nsec / one_second_us;
result->tv_nsec += nsec % one_second_us;
}
const void *ring_buffer_read_acquire(ring_buffer *buffer, ring_buffer_loc *loc)
{
bool is_waiting = false;
start:
pthread_spin_lock(&buffer->read_i_lock);
const int read_i = buffer->read_i;
pthread_spinlock_t *cell_lock = &buffer->locks[read_i];
pthread_spin_lock(cell_lock);
const int state = buffer->states[read_i];
if (state == NONE || state == WRITING || state == READING) {
if (!is_waiting) {
is_waiting = true;
pthread_spin_lock(&buffer->waiting_readers_lock);
++buffer->waiting_readers;
pthread_mutex_lock(&buffer->value_written_lock);
pthread_spin_unlock(&buffer->waiting_readers_lock);
} else {
pthread_mutex_lock(&buffer->value_written_lock);
}
pthread_spin_unlock(cell_lock);
pthread_spin_unlock(&buffer->read_i_lock);
// "new" code:
// struct timespec ts;
// do {
// timespec_now_plus_ms(&ts, 50);
// } while (pthread_cond_timedwait(&buffer->value_written, &buffer->value_written_lock, &ts) == ETIMEDOUT && buffer->states[read_i] == state);
// pthread_mutex_unlock(&buffer->value_written_lock);
// "old" code (which hangs quite often):
pthread_cond_wait(&buffer->value_written, &buffer->value_written_lock);
pthread_mutex_unlock(&buffer->value_written_lock);
goto start;
} else if (state == SET) {
if (is_waiting) {
pthread_spin_lock(&buffer->waiting_readers_lock);
--buffer->waiting_readers;
assert(buffer->waiting_readers >= 0);
pthread_spin_unlock(&buffer->waiting_readers_lock);
}
buffer->states[read_i] = READING;
ring_buffer_inc_index(buffer, &buffer->read_i);
pthread_spin_unlock(&buffer->read_i_lock);
pthread_spin_unlock(cell_lock);
*loc = read_i;
return &buffer->buffer[read_i * buffer->element_size];
} else {
printf("unknown state!\n");
exit(1);
}
}
void ring_buffer_read_finish(ring_buffer *buffer, ring_buffer_loc loc)
{
pthread_spinlock_t *cell_lock = &buffer->locks[loc];
pthread_spin_lock(cell_lock);
buffer->states[loc] = NONE;
pthread_spin_unlock(cell_lock);
pthread_spin_lock(&buffer->waiting_writers_lock);
if (buffer->waiting_writers > 0) {
pthread_cond_signal(&buffer->value_read);
}
pthread_spin_unlock(&buffer->waiting_writers_lock);
}
void *ring_buffer_write_acquire(ring_buffer *buffer, ring_buffer_loc *loc)
{
bool is_waiting = false;
start:
pthread_spin_lock(&buffer->write_i_lock);
const int write_i = buffer->write_i;
pthread_spinlock_t *cell_lock = &buffer->locks[write_i];
pthread_spin_lock(cell_lock);
const int state = buffer->states[write_i];
if (state == SET || state == READING || state == WRITING) {
if (!is_waiting) {
is_waiting = true;
pthread_spin_lock(&buffer->waiting_writers_lock);
++buffer->waiting_writers;
pthread_mutex_lock(&buffer->value_read_lock);
pthread_spin_unlock(&buffer->waiting_writers_lock);
} else {
pthread_mutex_lock(&buffer->value_read_lock);
}
pthread_spin_unlock(cell_lock);
pthread_spin_unlock(&buffer->write_i_lock);
// "new" code:
// struct timespec ts;
// do {
// timespec_now_plus_ms(&ts, 5);
// } while (pthread_cond_timedwait(&buffer->value_read, &buffer->value_read_lock, &ts) == ETIMEDOUT && buffer->states[write_i] == state);
// pthread_mutex_unlock(&buffer->value_read_lock);
// "old" code (which hangs quite often):
pthread_cond_wait(&buffer->value_read, &buffer->value_read_lock);
pthread_mutex_unlock(&buffer->value_read_lock);
goto start;
} else if (state == NONE) {
if (is_waiting) {
pthread_spin_lock(&buffer->waiting_writers_lock);
--buffer->waiting_writers;
assert(buffer->waiting_writers >= 0);
pthread_spin_unlock(&buffer->waiting_writers_lock);
}
buffer->states[write_i] = WRITING;
ring_buffer_inc_index(buffer, &buffer->write_i);
pthread_spin_unlock(&buffer->write_i_lock);
pthread_spin_unlock(cell_lock);
*loc = write_i;
return &buffer->buffer[write_i * buffer->element_size];
} else {
printf("unknown state!\n");
exit(1);
}
}
void ring_buffer_write_finish(ring_buffer *buffer, ring_buffer_loc loc)
{
pthread_spinlock_t *cell_lock = &buffer->locks[loc];
pthread_spin_lock(cell_lock);
buffer->states[loc] = SET;
pthread_spin_unlock(cell_lock);
pthread_spin_lock(&buffer->waiting_readers_lock);
if (buffer->waiting_readers > 0) {
pthread_cond_signal(&buffer->value_written);
}
pthread_spin_unlock(&buffer->waiting_readers_lock);
}
/* just for debugging */
void ring_buffer_dump(const ring_buffer *buffer)
{
printf("RingBuffer\n");
printf(" Capacity: %d\n", buffer->capacity);
printf(" Element size: %d\n", buffer->element_size);
printf(" Read index: %d\n", buffer->read_i);
printf(" Write index: %d\n", buffer->write_i);
printf(" Cells:\n");
for (int i = 0; i < buffer->capacity; ++i) {
printf(" [%d]: STATE = ", i);
switch (buffer->states[i]) {
case NONE:
printf("NONE");
break;
case WRITING:
printf("WRITING");
break;
case READING:
printf("READING");
break;
case SET:
printf("SET");
break;
}
printf("\n");
}
printf("\n");
}
/*
* Test run
*/
struct write_read_n_conf {
ring_buffer *buffer;
int n;
};
static void *producer_thread(void *arg)
{
struct write_read_n_conf conf = *(struct write_read_n_conf *)arg;
for (int i = 0; i < conf.n; ++i) {
ring_buffer_loc loc;
int *value = ring_buffer_write_acquire(conf.buffer, &loc);
*value = i;
ring_buffer_write_finish(conf.buffer, loc);
if (i % 1000 == 0) {
printf("%d / %d\n", i, conf.n);
}
}
return NULL;
}
static void *consumer_thread(void *arg)
{
struct write_read_n_conf conf = *(struct write_read_n_conf *)arg;
int tmp;
bool ok = true;
for (int i = 0; i < conf.n; ++i) {
ring_buffer_loc loc;
const int *value = ring_buffer_read_acquire(conf.buffer, &loc);
tmp = *value;
ring_buffer_read_finish(conf.buffer, loc);
ok = ok && (tmp == i);
}
printf("ok = %d\n", ok);
return (void *)ok;
}
void write_read_n_parallel(int n)
{
ring_buffer *buffer = ring_buffer_create(50, sizeof(int));
struct write_read_n_conf conf = {
.buffer = buffer,
.n = n
};
pthread_t consumer;
pthread_t producer;
pthread_create(&consumer, NULL, consumer_thread, &conf);
pthread_create(&producer, NULL, producer_thread, &conf);
pthread_join(producer, NULL);
void *res;
pthread_join(consumer, &res); // hacky way to pass a bool: res == NULL means false, and otherwise true
assert(res != NULL);
}
int main() {
write_read_n_parallel(10000000);
}

Related

typedef struct in typedef struct variable assignment not working in c

I'm trying to assing a variable in a typedef struct which is stored in another typedef struct. Once i store this pointer inside a pointer array, the variable id of the procHeaader is lost.
#include <stdlib.h>
#include <Arduino.h>
#include "Proc.h"
#include "../../Fat/src/Fat.h"
/*
creates a process table with a size of MAX_PROC_COUNT
*/
ProcTable * createProcTable() {
ProcTable * procTable = (ProcTable *)malloc(sizeof(ProcTable));
procTable->procCount = 0;
return procTable;
}
/*
dumps the process table
*/
void dumpProcessTable(ProcTable * table) {
Serial.println("Processes");
if(table->procCount < 1) {
Serial.println("---------------");
Serial.println("no procs running");
return;
}
for(int i = 0; i < table->procCount; i++) {
Serial.println("---------------");
Serial.println("pid | quantum");
Serial.print(table->procs[i]->header->id);
Serial.write(" ");
Serial.print(table->procs[i]->quantum);
Serial.write("\n");
}
}
/*
creates a process in the paused state
*/
Process * createProcess(ProcTable * table, Fat * fat, char * fileName) {
Process * proc = (Process*)malloc(sizeof(Process));
proc->header = (ProcHeader*)malloc(sizeof(ProcHeader));
proc->mmu = (MMU *)malloc(sizeof(MMU));
proc->header->id = table->procCount;
proc->header->state = RUNNING;
proc->quantum = MAX_QUANTUM; //TODO: change this based on how big the binary that gets loaded into it is
loadProcExecutable(proc, fat, fileName);
table->procs[table->procCount] = proc;
table->procCount++;
return proc;
}
Process * findProc(ProcTable * table, int pid) {
for(int i = 0; i <= table->procCount; i++) {
if(table->procs[i]->header->id == pid) {
return table->procs[i];
}
}
Serial.println("Tried to lookup non existent process. terminating operation.");
}
/*
cuts a process specified with pid from the proc table and decrements the procCounter
*/
void deleteProc(ProcTable * table, int pid) {
Process * procs[MAX_PROC_COUNT] = {};
for(int i = 0; i < table->procCount; i++) {
if(table->procs[i]->header->id != pid) {
procs[i] = table->procs[i];
}
}
memcpy(table->procs, procs, MAX_PROC_COUNT);
table->procCount--;
}
/*
sets the state of a process
states:
0: paused
1: running
2: terminated
this should also be used to outright kill processes
*/
void setProcState(ProcTable * table, int pid, int state) {
Process * proc = findProc(table, pid);
switch(state) {
case PAUSED:
proc->header->state = PAUSED;
break;
case RUNNING:
proc->header->state = RUNNING;
break;
case TERMINATED:
deleteProc(table, proc->header->id);
break;
}
}
/*
Loads a program into a given process space
*/
void loadProcExecutable(Process * proc, Fat * fat, char * fileName) {
fileInfo * filePtr = (fileInfo *)malloc(sizeof(fileInfo));
int fileIndex = findFileByName(fileName, fat->fileCount);
char * fileContents;
if(fileIndex == -1) {
Serial.print(fileName);
Serial.print(": no such file\n");
return;
}
getFATEntry(fileIndex, filePtr);
getFileData(*filePtr, fileContents);
writeMemoryChunk(proc->mmu, fileContents, 0, sizeof(fileContents)/sizeof(char));
}
void startNextProc(ProcTable * table) {
table->procs[table->procCount-1]->header->state == RUNNING;
}
/*
updates all processes timers. If any process has run out of time, it will be killed
*/
void updateProcessQuantums(ProcTable * table, int cycleTime) {
if(table->procCount == 0) {
return;
}
for(int i = 0; i <= table->procCount; i++) {
if(table->procs[i]->quantum - cycleTime < 0) {
if(!table->procs[i]->interpreter->finished) {
table->procs[i]->header->state = PAUSED;
}
//dump a message
Serial.print("killing pid: ");
Serial.print(table->procs[i]->header->id);
Serial.println(" quantum expired");
//kill the process
setProcState(table, table->procs[i]->header->id, TERMINATED);
//see what proc is next in line
startNextProc(table);
return;
}
if(table->procs[i]->header->state == PAUSED) {
continue;
}
if(table->procs[i]->header->state == RUNNING) {
table->procs[i]->quantum -= 1;
// InterpretOneCycle(table->procs[i]->interpreter, &table->procs[i]->header->pc, table->procs[i]->mmu->memUsed);
// if(table->procs[i]->interpreter->finished) {
// setProcState(table, table->procs[i]->header->id, TERMINATED);
// startNextProc(table);
// }
}
}
return;
}
header file:
#define PROC_MEM_SIZE 0x200
#define MAX_PROC_COUNT 0x9
#define MAX_QUANTUM 1000000 //ticks
#define PAUSED 0
#define RUNNING 1
#define TERMINATED 2
typedef struct {
char * stack;
int id;
int state;
int sp;
int loopAddr;
int pc;
} ProcHeader;
typedef struct {
ProcHeader * header; //contains global information about the process
unsigned int memOffset; //this is for the interpreter to know where to execute its opcodes
long int quantum; //time slot for the process
Interpreter * interpreter; //Interpreter that will execute opcodes
MMU * mmu; //Manager for process memory
} Process;
typedef struct {
Process * procs[MAX_PROC_COUNT];
int procCount;
} ProcTable;
ProcTable * createProcTable();
Process * createProcess(ProcTable * table, Fat * fat, char * filename);
void setProcState(ProcTable * table, int pid, int state);
void dumpProcessTable(ProcTable * table);
void updateProcessQuantums(ProcTable * table, int cycleTime);
void loadProcExecutable(Process * proc, Fat * fat, char * fileName);
caller method:
Kernel * initKernel() {
Kernel * kernel = (Kernel*)malloc(sizeof(Kernel));
kernel->procTable = createProcTable();
kernel->fat = createFAT();
return kernel;
}
caller header:
typedef struct {
ProcTable * procTable;
Fat * fat;
int debugMode;
int cycleDuration;
} Kernel;
Kernel * initKernel();
Process * createProcess(ProcTable * table);
Once i view records of the procs array the id of the process it return a - something number. Why is this?
For debugging is use the following function:
for(int i = 0; i < table->procCount; i++) {
Serial.println("---------------");
Serial.println("pid | quantum");
Serial.print(table->procs[i]->header->id); //returns 1256?
Serial.write(" ");
Serial.print(table->procs[i]->quantum);
Serial.write("\n");
}

Send message through Ring (Circular) Buffer between Threads (in C)

I need to send a message from Main thread to my Created Thread using WinAPI and Ring Buffer.
I defined structures and wrote functions for my Ring buffer.
Ring Buffer - it contains head, tail, size and pointer to the structure Descriptor which has length of Data and data itself. As I need to send 2 parameters to CreateThread function, I created the third structure ThreadParams to keep 2 parameters.
I want to leave this structures how they are now, not changeable.
typedef struct _Descriptor
{
uint32_t dataLen;
void * data;
} Descriptor;
typedef struct _ringBuffer
{
Descriptor *bufferData;
int head;
int tail;
int size;
} ringBuffer;
typedef struct _ThreadParams
{
void * ptr1;
void * ptr2;
} ThreadParams;
There are my realisations of Ring Buffer functions:
void bufferFree(ringBuffer *buffer)
{
free(buffer->bufferData);
}
void ringInitialization(ringBuffer *buffer, int size)
{
buffer->size = size;
buffer->head = 0;
buffer->tail = 0;
buffer->bufferData = (Descriptor*)malloc(sizeof(Descriptor) * size);
}
int pushBack(ringBuffer *buffer, void * data) // fill buffer
{
buffer->bufferData[buffer->tail++] = *(Descriptor*)data;
if (buffer->tail == buffer->size)
{
buffer->tail = 0;
}
return 0;
}
int popFront(ringBuffer *buffer)
{
if (buffer->head != buffer->tail)
{
buffer->head++;
if (buffer->head == buffer->size)
{
buffer->head = 0;
}
}
return 0;
}
My main: I checked that I can send a few bytes (the memory is shared between threads), now I need to send a big message (> BUFF_SIZE) though Ring Buffer what I'm trying to do in while() cycle. Here is the question: how should I do it? My thing doesn't work because I catch an exception in printf() function (memory acces violation).
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include <windows.h>
#include <strsafe.h>
#include <stdint.h>
#define RING_SIZE 256
#define BUFFER_SIZE 1024
DWORD WINAPI HandleSendThread(LPVOID params);
uint8_t * getPointer(uint8_t *buffer, uint32_t index)
{
uint8_t * ptr = ((uint8_t*)buffer) + index * BUFFER_SIZE;
return ptr;
}
int main(int argc, char * argv[])
{
//Descriptor * ringData = (Descriptor *)malloc(sizeof(Descriptor) * RING_SIZE);
ringBuffer ring;
ringInitialization(&ring, RING_SIZE);
void * packetBuffer = malloc(BUFFER_SIZE * RING_SIZE);
uint8_t * currentBuffer = getPointer(packetBuffer, 0);
uint8_t * str = "Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense. Mr.Dursley was the director of a firm called Grunnings, which made drills.He was a big, beefy man with hardly any neck, although he did have a very large mustache.Mrs.Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors.The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.";
strcpy(currentBuffer, str);
ring.bufferData[0].data = currentBuffer;
ring.bufferData[0].dataLen = strlen(str);
int currentSize = 0;
int ringSize = RING_SIZE;
while(ring.bufferData[0].data != '\0')
{
for (int i = currentSize; i < ringSize; i + RING_SIZE)
{
pushBack(&ring, currentBuffer);
printf("h = %s, tail = %s, dataBuffer = %s\n", (char*)ring.head, (char*)ring.tail, (char*)ring.bufferData[i].data);
}
currentSize = ringSize;
ringSize = 2 * ringSize;
popFront(&ring);
}
ThreadParams params = { &ring, packetBuffer };
HANDLE MessageThread = 0;
MessageThread = CreateThread(NULL, 0, HandleSendThread, &params, 0, NULL);
if (MessageThread == NULL)
{
ExitProcess(MessageThread);
}
WaitForSingleObject(MessageThread, INFINITE);
CloseHandle(MessageThread);
system("pause");
return 0;
}
And my CreateThread function:
DWORD WINAPI HandleSendThread(LPVOID params)
{
ringBuffer * ring = ((ThreadParams*)params)->ptr1;
void * buffer = ((ThreadParams*)params)->ptr2;
//ring->bufferData[0].dataLen = sizeof(buffer) + sizeof(ring->bufferData[0])*1024;
printf("Shared memory check: ringBuffer data = \"%s\", \nlength = %d\n", (char*)ring->bufferData[0].data, ring->bufferData[0].dataLen);
return 0;
}
Your most immediate problem is the inconsistency between the code in pushBack(), which expects data to point to a Descriptor, and the code in your main function, which passes in a pointer to a string instead.
If you had declared pushBack() properly, i.e.,
void pushBack(ringBuffer *buffer, Descriptor * data)
{
buffer->bufferData[buffer->tail++] = *data;
if (buffer->tail == buffer->size)
{
buffer->tail = 0;
}
}
Then the compiler would have been able to warn you about the discrepancy.
You also have an infinite loop here:
for (int i = currentSize; i < ringSize; i + RING_SIZE)
You probably meant
for (int i = currentSize; i < ringSize; i += RING_SIZE)
... although it still doesn't look to me like it will do anything sensible. Nor do I understand the purpose of the outer loop, which compares a pointer to a character.
Found a solution
int main(int argc, char * argv[])
{
ringBuffer ring;
ringInitialization(&ring, RING_SIZE);
void * packetBuffer = malloc(BUFFER_SIZE * RING_SIZE);
Descriptor temp = { 0 };
uint8_t * currentBuffer = getPointer(packetBuffer, 0);
uint8_t * str = "Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense. Mr.Dursley was the director of a firm called Grunnings, which made drills.He was a big, beefy man with hardly any neck, although he did have a very large mustache.Mrs.Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors.The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.";
strcpy(currentBuffer, str);
temp.dataLen = strlen(str);
temp.data = currentBuffer;
pushBack(&ring, &temp);
ThreadParams params = { &ring, packetBuffer };
HANDLE MessageThread = 0;
MessageThread = CreateThread(NULL, 0, HandleSendThread, &params, 0, NULL);
if (MessageThread == NULL)
{
ExitProcess(MessageThread);
}
WaitForSingleObject(MessageThread, INFINITE);
CloseHandle(MessageThread);
system("pause");
return 0;
}
DWORD WINAPI HandleSendThread(LPVOID params)
{
ringBuffer * ring = ((ThreadParams*)params)->ptr1;
void * buffer = ((ThreadParams*)params)->ptr2;
Descriptor * temp = &ring->bufferData[ring->head];
for (int i = 0; i < temp->dataLen; i++)
{
printf("%c", ((char*)temp->data)[i]);
}
printf("\n");
return 0;
}

Where is my message queue producing a segmentation fault?

The message queue simply stops working when dealing with many many threads. It only seems to work okay with 10 threads, for exmaple. GDB tells me
Program received signal SIGSEGV, Segmentation fault.
__GI_____strtol_l_internal (nptr=0x0, endptr=endptr#entry=0x0, base=base#entry=10, group=group#entry=0, loc=0x7ffff78b0060 <_nl_global_locale>)
at ../stdlib/strtol_l.c:298
298 ../stdlib/strtol_l.c: No such file or directory.
But I have no idea what this means. The same code on Windows works fine but on linux it doesn't, which confuses me more.
You can see below how this queue works. It is a singly linked list with locking while receiving messages. Please help me find where I messed up.
typedef struct Message {
unsigned type;
unsigned code;
void *data;
} Message;
typedef struct MessageQueueElement {
Message message;
struct MessageQueueElement *next;
} MessageQueueElement;
typedef struct MessageQueue {
MessageQueueElement *first;
MessageQueueElement *last;
} MessageQueue;
MessageQueue mq;
pthread_mutex_t emptyLock, sendLock;
pthread_cond_t emptyCond;
void init() {
mq.first = malloc(sizeof(MessageQueueElement));
mq.last = mq.first;
pthread_mutex_init(&emptyLock, NULL);
pthread_mutex_init(&sendLock, NULL);
pthread_cond_init(&emptyCond, NULL);
}
void clean() {
free(mq.first);
pthread_mutex_destroy(&emptyLock);
pthread_mutex_destroy(&sendLock);
pthread_cond_destroy(&emptyCond);
}
void sendMessage(MessageQueue *this, Message *message) {
pthread_mutex_lock(&sendLock);
if (this->first == this->last) {
pthread_mutex_lock(&emptyLock);
this->last->message = *message;
this->last = this->last->next = malloc(sizeof(MessageQueueElement));
pthread_cond_signal(&emptyCond);
pthread_mutex_unlock(&emptyLock);
} else {
this->last->message = *message;
this->last = this->last->next = malloc(sizeof(MessageQueueElement));
}
pthread_mutex_unlock(&sendLock);
}
int waitMessage(MessageQueue *this, int (*readMessage)(unsigned, unsigned, void *)) {
pthread_mutex_lock(&emptyLock);
if (this->first == this->last) {
pthread_cond_wait(&emptyCond, &emptyLock);
}
pthread_mutex_unlock(&emptyLock);
int n = readMessage(this->first->message.type, this->first->message.code, this->first->message.data);
MessageQueueElement *temp = this->first;
this->first = this->first->next;
free(temp);
return n;
}
some test code:
#define EXIT_MESSAGE 0
#define THREAD_MESSAGE 1
#define JUST_A_MESSAGE 2
#define EXIT 0
#define CONTINUE 1
int readMessage(unsigned type, unsigned code, void *data) {
if (type == THREAD_MESSAGE) {
printf("message from thread %d: %s\n", code, (char *)data);
free(data);
} else if (type == JUST_A_MESSAGE) {
puts((char *)data);
free(data);
} else if (type == EXIT_MESSAGE) {
puts("ending the program");
return EXIT;
}
return CONTINUE;
}
int nThreads;
int counter = 0;
void *worker(void *p) {
double pi = 0.0;
for (int i = 0; i < 1000000; i += 1) {
pi += (4.0 / (8.0 * i + 1.0) - 2.0 / (8.0 * i + 4.0) - 1.0 / (8.0 * i + 5.0) - 1.0 / (8.0 * i + 6.0)) / pow(16.0, i);
}
char *s = malloc(100);
sprintf(s, "pi equals %.8f", pi);
sendMessage(&mq, &(Message){.type = THREAD_MESSAGE, .code = (int)(intptr_t)p, .data = s});
counter += 1;
char *s2 = malloc(100);
sprintf(s2, "received %d message%s", counter, counter == 1 ? "" : "s");
sendMessage(&mq, &(Message){.type = JUST_A_MESSAGE, .data = s2});
if (counter == nThreads) {
sendMessage(&mq, &(Message){.type = EXIT_MESSAGE});
}
}
int main(int argc, char **argv) {
clock_t timer = clock();
init();
nThreads = atoi(argv[1]);
pthread_t threads[nThreads];
for (int i = 0; i < nThreads; i += 1) {
pthread_create(&threads[i], NULL, worker, (void *)(intptr_t)i);
}
while (waitMessage(&mq, readMessage));
for (int i = 0; i < nThreads; i += 1) {
pthread_join(threads[i], NULL);
}
clean();
timer = clock() - timer;
printf("%.2f\n", (double)timer / CLOCKS_PER_SEC);
return 0;
}
--- EDIT ---
Okay I managed to fix the problem by changing the program a bit using semaphores. The waitMessage function doesn't have to be locked since it is accessed by only one thread and the values that it modifies does not clash with sendMessage.
MessageQueue mq;
pthread_mutex_t mutex;
sem_t sem;
void init() {
mq.first = malloc(sizeof(MessageQueueElement));
mq.last = mq.first;
pthread_mutex_init(&mutex, NULL);
sem_init(&sem, 0, 0);
}
void clean() {
free(mq.first);
pthread_mutex_destroy(&mutex);
sem_destroy(&sem);
}
void sendMessage(MessageQueue *this, Message *message) {
pthread_mutex_lock(&mutex);
this->last->message = *message;
this->last = this->last->next = malloc(sizeof(MessageQueueElement));
pthread_mutex_unlock(&mutex);
sem_post(&sem);
}
int waitMessage(MessageQueue *this, int (*readMessage)(unsigned, unsigned, void *)) {
sem_wait(&sem);
int n = readMessage(this->first->message.type, this->first->message.code, this->first->message.data);
MessageQueueElement *temp = this->first;
this->first = this->first->next;
free(temp);
return n;
}
Your waitMessage function is modifying this->first outside of any locking. This is a bad thing.
It's often not worth recreating things that are already provided for you by an OS. You're effectively trying to set up a pipe of Message structures. You could simply use an anonymous pipe instead (see here for Linux, or here for Windows) and write/read Message structures to/from it. There's also POSIX message queues which are probably a bit more efficient.
In your case with multiple worker threads you'd have to have a supplementary mutex semaphore to control which worker is trying to read from the pipe or message queue.

int queue with compare and swap has race condition

I have written a synchronised queue for holding integers and am faced with a weird race condition which I cannot seem to be able to understand.
Please do NOT post solutions, I know how to fix the code and make it work, I want to know what the race condition is and why it is not working as intended. Please help me understand what is going wrong and why.
First the important part of the code:
This assumes that the application will never put in more then the buffer can hold, thus no check for the current buffer size
static inline void int_queue_put_sync(struct int_queue_s * const __restrict int_queue, const long int value ) {
if (value) { // 0 values are not allowed to be put in
size_t write_offset; // holds a current copy of the array index where to put the element
for (;;) {
// retrieve up to date write_offset copy and apply power-of-two modulus
write_offset = int_queue->write_offset & int_queue->modulus;
// if that cell currently holds 0 (thus is empty)
if (!int_queue->int_container[write_offset])
// Appetmt to compare and swap the new value in
if (__sync_bool_compare_and_swap(&(int_queue->int_container[write_offset]), (long int)0, value))
// if successful then this thread was the first do do this, terminate the loop, else try again
break;
}
// increment write offset signaling other threads where the next free cell is
int_queue->write_offset++;
// doing a synchronised increment here does not fix the race condition
}
}
This seems to have a rare race condition which seems to not increment the write_offset.
Tested on OS X gcc 4.2, Intel Core i5 quadcore and Linux Intel C Compiler 12 on RedHat 2.6.32 Intel(R) Xeon(R). Both produce race conditions.
Full source with test cases:
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <stdint.h>
// #include "int_queue.h"
#include <stddef.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#ifndef INT_QUEUE_H
#define INT_QUEUE_H
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
struct int_queue_s {
size_t size;
size_t modulus;
volatile size_t read_offset;
volatile size_t write_offset;
volatile long int int_container[0];
};
static inline void int_queue_put(struct int_queue_s * const __restrict int_queue, const long int value ) {
if (value) {
int_queue->int_container[int_queue->write_offset & int_queue->modulus] = value;
int_queue->write_offset++;
}
}
static inline void int_queue_put_sync(struct int_queue_s * const __restrict int_queue, const long int value ) {
if (value) {
size_t write_offset;
for (;;) {
write_offset = int_queue->write_offset & int_queue->modulus;
if (!int_queue->int_container[write_offset])
if (__sync_bool_compare_and_swap(&(int_queue->int_container[write_offset]), (long int)0, value))
break;
}
int_queue->write_offset++;
}
}
static inline long int int_queue_get(struct int_queue_s * const __restrict int_queue) {
size_t read_offset = int_queue->read_offset & int_queue->modulus;
if (int_queue->write_offset != int_queue->read_offset) {
const long int value = int_queue->int_container[read_offset];
int_queue->int_container[read_offset] = 0;
int_queue->read_offset++;
return value;
} else
return 0;
}
static inline long int int_queue_get_sync(struct int_queue_s * const __restrict int_queue) {
size_t read_offset;
long int volatile value;
for (;;) {
read_offset = int_queue->read_offset;
if (int_queue->write_offset == read_offset)
return 0;
read_offset &= int_queue->modulus;
value = int_queue->int_container[read_offset];
if (value)
if (__sync_bool_compare_and_swap(&(int_queue->int_container[read_offset]), (long int)value, (long int)0))
break;
}
int_queue->read_offset++;
return value;
}
static inline struct int_queue_s * int_queue_create(size_t num_values) {
struct int_queue_s * int_queue;
size_t modulus;
size_t temp = num_values + 1;
do {
modulus = temp;
temp--;
temp &= modulus;
} while (temp);
modulus <<= 1;
size_t int_queue_mem = sizeof(*int_queue) + ( sizeof(int_queue->int_container[0]) * modulus);
if (int_queue_mem % sysconf(_SC_PAGE_SIZE)) int_queue_mem += sysconf(_SC_PAGE_SIZE) - (int_queue_mem % sysconf(_SC_PAGE_SIZE));
int_queue = mmap(NULL, int_queue_mem, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE , -1, 0);
if (int_queue == MAP_FAILED)
return NULL;
int_queue->modulus = modulus-1;
int_queue->read_offset = 0;
int_queue->write_offset = 0;
int_queue->size = num_values;
memset((void*)int_queue->int_container, 0, sizeof(int_queue->int_container[0]) * modulus);
size_t i;
for (i = 0; i < num_values; ) {
int_queue_put(int_queue, ++i );
}
return int_queue;
}
#endif
void * test_int_queue_thread(struct int_queue_s * int_queue) {
long int value;
size_t i;
for (i = 0; i < 10000000; i++) {
int waited = -1;
do {
value = int_queue_get_sync(int_queue);
waited++;
} while (!value);
if (waited > 0) {
printf("waited %d cycles to get a new value\n", waited);
// continue;
}
// else {
printf("thread %p got value %ld, i = %zu\n", (void *)pthread_self(), value, i);
// }
int timesleep = rand();
timesleep &= 0xFFF;
usleep(timesleep);
int_queue_put_sync(int_queue, value);
printf("thread %p put value %ld back, i = %zu\n", (void *)pthread_self(), value, i);
}
return NULL;
}
int main(int argc, char ** argv) {
struct int_queue_s * int_queue = int_queue_create(2);
if (!int_queue) {
fprintf(stderr, "error initializing int_queue\n");
return -1;
}
srand(0);
long int value[100];
size_t i;
for (i = 0; i < 100; i++) {
value[0] = int_queue_get(int_queue);
if (!value[0]) {
printf("error getting value\n");
}
else {
printf("got value %ld\n", value[0]);
}
int_queue_put(int_queue, value[0]);
printf("put value %ld back successfully\n", value[0]);
}
pthread_t threads[100];
for (i = 0; i < 4; i++) {
pthread_create(threads + i, NULL, (void * (*)(void *))test_int_queue_thread, int_queue);
}
for (i = 0; i < 4; i++) {
pthread_join(threads[i], NULL);
}
return 0;
}
Interesting question. Here is a wild guess. :-)
It seems you need some synchronization between your read_offset and write_offset.
For example, here is a race that may be related or not. Between your compare-and-swap and the write_offset increment you may have a reader come in and set the value back to zero.
Writer-1: get write_offset=0
Writer-2: get write_offset=0
Writer-1: compare-and-swap at offset=0
Writer-1: Set write_offset=1
Reader-1: compare-and-swap at offset=0 (sets it back to zero)
Writer-2: compare-and-swap at offset=0 again even though write_offset=1
Writer-2: Set write_offset=2
I believe that int_queue->write_offset++; is the problem: if two threads execute this instruction simultaneously, they will both load the same value from memory, increment it, and store the same result back (such that the variable only increases by one).
my opinion is
int_queue->write_offset++;
and
write_offset = int_queue->write_offset & int_queue->modulus;
are not thread safe

Producer/consumer with bounded buffer

Could someone check my code and tell me if I am on the right track.. It seems like I am a bit lost.. if you see my errors, please let me know them..
What I am trying to do is to solve bounded buffer using my own semaphores as well as GCD.
Thanks in advance..
sema.c
void procure( Semaphore *semaphore ) {
pthread_mutex_lock(semaphore->mutex1);
while(semaphore->value <= 0)
pthread_cond_wait(&semaphore->condition, semaphore->mutex1);
semaphore->value--;
pthread_mutex_unlock(semaphore->mutex1);
}
void vacate( Semaphore *semaphore ) {
pthread_mutex_lock(semaphore->mutex1);
semaphore->value++;
pthread_cond_signal(&semaphore->condition);
pthread_mutex_unlock(semaphore->mutex1);
}
void init ( Semaphore *semaphore ){
semaphore->value = 1;
pthread_mutex_t myMutex;
semaphore->mutex1 = &myMutex;
pthread_mutex_init( semaphore->mutex1, NULL);
}
void destroy ( Semaphore *semaphore ) {
pthread_mutex_destroy(semaphore->mutex1);
}
and main.c
struct variables {
Semaphore *sem;
};
struct variables vars;
void constructer (int *buffer, int *in, int *out) {
init(vars.sem);
}
void deconstructer () {
destroy(vars.sem);
}
int rand_num_gen() {
uint_fast16_t buffer;
int file;
int *rand;
file = open("/dev/random", O_RDONLY);
while( 1 ) {
read(file, &buffer, sizeof(buffer));
printf("16 bit number: %hu\n", buffer );
*rand = (int) buffer;
close(file);
break;
}
return *rand;
}
void put_buffer( int* buffer, int* in, int* out ) {
buffer[*in] = rand_num_gen(); // produce
procure(vars.sem); // wait here
*in = (*in + 1) % BUF_SIZE;
vacate(vars.sem);
}
void get_buffer( int* buffer, int* in, int* out ) {
int value;
procure(vars.sem);
value = buffer[*out];
vacate(vars.sem);
*out = (*out + 1) % BUF_SIZE;
}
int main (void) {
int *in, *out, *buffer;
constructer(buffer, in, out);
dispatch_queue_t producer, consumer;
producer = dispatch_queue_create("put_buffer", NULL);
consumer = dispatch_queue_create("get_buffer", NULL);
dispatch_async(producer,
^{
int i;
do
{
put_buffer( buffer, in, out );
dispatch_async(consumer,
^{
get_buffer( buffer, in, out );
if (i == RUN_LENGTH) exit(EXIT_SUCCESS);
});
}
while (i < RUN_LENGTH);
});
dispatch_main();
deconstructer();
exit (0);
}
Your code has a bug. In the init function you assign the address of a local variable to semaphore->mutex1, and when the function returns this address will be invalid. Later you still use this address, so this leads to undefined behavior.
You must either allocate the memory for the mutex directly in the semaphore (without a pointer) or allocate the memory via malloc.
Update:
Your program has so many bugs that you should definitely pick an easier topic to learn the basic concepts about memory management, how to allocate, use and reference a buffer, do proper error handling, etc. Here is a slightly edited version of your code. It still won't work, but probably has some ideas that you should follow.
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
void procure(Semaphore *semaphore) {
pthread_mutex_lock(semaphore->mutex1);
while (semaphore->value <= 0)
pthread_cond_wait(&semaphore->condition, semaphore->mutex1);
semaphore->value--;
pthread_mutex_unlock(semaphore->mutex1);
}
void vacate(Semaphore *semaphore) {
pthread_mutex_lock(semaphore->mutex1);
semaphore->value++;
pthread_cond_signal(&semaphore->condition);
pthread_mutex_unlock(semaphore->mutex1);
}
struct variables {
mutex_t sem_mutex;
Semaphore sem;
};
struct variables vars;
void constructor(int *buffer, int *in, int *out) {
vars.sem.value = 1;
vars.sem.mutex1 = &vars.sem_mutex;
pthread_mutex_init(vars.sem.mutex1, NULL);
}
void deconstructor() {
pthread_mutex_destroy(&semaphore->mutex1);
}
int rand_num_gen() {
const char *randomfile = "/dev/random";
unsigned char buffer[2]; // Changed: always treat files as byte sequences.
FILE *f = fopen(randomfile, "rb");
// Changed: using stdio instead of raw POSIX file access,
// since the API is much simpler; you don't have to care
// about interrupting signals or partial reads.
if (f == NULL) { // Added: error handling
fprintf(stderr, "E: cannot open %s\n", randomfile);
exit(EXIT_FAILURE);
}
if (fread(buffer, 1, 2, f) != 2) { // Added: error handling
fprintf(stderr, "E: cannot read from %s\n", randomfile);
exit(EXIT_FAILURE);
}
fclose(f);
int number = (buffer[0] << CHAR_BIT) | buffer[1];
// Changed: be independent of the endianness of the system.
// This doesn't matter for random number generators but is
// still an important coding style.
printf("DEBUG: random number: %x\n", (unsigned int) number);
return number;
}
void put_buffer( int* buffer, int* in, int* out ) {
buffer[*in] = rand_num_gen(); // produce
procure(&vars.sem); // wait here
*in = (*in + 1) % BUF_SIZE;
vacate(&vars.sem);
}
void get_buffer( int* buffer, int* in, int* out ) {
int value;
procure(&vars.sem);
value = buffer[*out];
vacate(&vars.sem);
*out = (*out + 1) % BUF_SIZE;
}
int main (void) {
int inindex = 0, outindex = 0;
int buffer[BUF_SIZE];
constructor(buffer, &inindex, &outindex);
// Changed: provided an actual buffer and actual variables
// for the indices into the buffer.
dispatch_queue_t producer, consumer;
producer = dispatch_queue_create("put_buffer", NULL);
consumer = dispatch_queue_create("get_buffer", NULL);
dispatch_async(producer, ^{
int i;
do {
put_buffer(buffer, &inindex, &outindex);
dispatch_async(consumer, ^{
get_buffer(buffer, &inindex, &outindex);
if (i == RUN_LENGTH) exit(EXIT_SUCCESS);
});
} while (i < RUN_LENGTH);
});
dispatch_main();
deconstructor();
exit (0);
}
As I said, I didn't catch all the bugs.

Resources