Some threads never get execution when invoked in large amount - c

Consider the following program,
static long count = 0;
void thread()
{
printf("%d\n",++count);
}
int main()
{
pthread_t t;
sigset_t set;
int i,limit = 30000;
struct rlimit rlim;
getrlimit(RLIMIT_NPROC, &rlim);
rlim.rlim_cur = rlim.rlim_max;
setrlimit(RLIMIT_NPROC, &rlim);
for(i=0; i<limit; i++) {
if(pthread_create(&t,NULL,(void *(*)(void*))thread, NULL) != 0) {
printf("thread creation failed\n");
return -1;
}
}
sigemptyset(&set);
sigsuspend(&set);
return 0;
}
This program is expected to print 1 to 30000. But it some times prints 29945, 29999, 29959, etc. Why this is happening?

Because count isn't atomic, so you have a race condition both in the increment and in the subsequent print.
The instruction you need is atomic_fetch_add, to increment the counter and avoid the race condition. The example on cppreference illustrates the exact problem you laid out.
Your example can be made to work with just a minor adjustment:
#include <stdio.h>
#include <signal.h>
#include <sys/resource.h>
#include <pthread.h>
#include <stdatomic.h>
static atomic_long count = 1;
void * thread(void *data)
{
printf("%ld\n", atomic_fetch_add(&count, 1));
return NULL;
}
int main()
{
pthread_t t;
sigset_t set;
int i,limit = 30000;
struct rlimit rlim;
getrlimit(RLIMIT_NPROC, &rlim);
rlim.rlim_cur = rlim.rlim_max;
setrlimit(RLIMIT_NPROC, &rlim);
for(i=0; i<limit; i++) {
if(pthread_create(&t, NULL, thread, NULL) != 0) {
printf("thread creation failed\n");
return -1;
}
}
sigemptyset(&set);
sigsuspend(&set);
return 0;
}
I made a handful of other changes, such as fixing the thread function signature and using the correct printf format for printing longs. But the atomic issue is why you weren't printing all the numbers you expected.

Why this is happening?
Because you have a data race (undefined behavior).
In particular, this statement:
printf("%d\n",++count);
modifies a global (shared) variable without any locking. Since the ++ does not atomically increment it, it's quite possible for multiple threads to read the same value (say 1234), increment it, and store the updated value in parallel, resulting in 1235 being printed repeatedly (two or more times), and one or more of the increments being lost.
A typical solution is to either use mutex to avoid the data race, or (rarely) an atomic variable (which guarantees atomic increment). Beware: atomic variables are quite hard to get right. You are not ready to use them yet.

Related

How to solve the dining philosophers problem with only mutexes?

I wrote this program to solve the dining philosophers problem using Dijkstra's algorithm, notice that I'm using an array of booleans (data->locked) instead of an array of binary semaphores.
I'm not sure if this solution is valid (hence the SO question).
Will access to the data->locked array in both test and take_forks functions cause data races? if so is it even possible to solve this problem using Dijkstra's algorithm with only mutexes?
I'm only allowed to use mutexes, no semaphores, no condition variables (it's an assignment).
Example of usage:
./a.out 4 1000 1000
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <stdbool.h>
#define NOT_HUNGRY 1
#define HUNGRY 2
#define EATING 3
#define RIGHT ((i + 1) % data->n)
#define LEFT ((i + data->n - 1) % data->n)
typedef struct s_data
{
int n;
int t_sleep;
int t_eat;
int *state;
bool *locked;
pthread_mutex_t *state_mutex;
} t_data;
typedef struct s_arg
{
t_data *data;
int i;
} t_arg;
int ft_min(int a, int b)
{
if (a < b)
return (a);
return (b);
}
int ft_max(int a, int b)
{
if (a > b)
return (a);
return (b);
}
// if the LEFT and RIGHT threads are not eating
// and thread number i is hungry, change its state to EATING
// and signal to the while loop in `take_forks` to stop blocking.
// if a thread has a state of HUNGRY then it's guaranteed
// to be out of the critical section of `take_forks`.
void test(int i, t_data *data)
{
if (
data->state[i] == HUNGRY
&& data->state[LEFT] != EATING
&& data->state[RIGHT] != EATING
)
{
data->state[i] = EATING;
data->locked[i] = false;
}
}
// set the state of the thread number i to HUNGRY
// and block until the LEFT and RIGHT threads are not EATING
// in which case they will call `test` from `put_forks`
// which will result in breaking the while loop
void take_forks(int i, t_data *data)
{
pthread_mutex_lock(data->state_mutex);
data->locked[i] = true;
data->state[i] = HUNGRY;
test(i, data);
pthread_mutex_unlock(data->state_mutex);
while (data->locked[i]);
}
// set the state of the thread number i to NOT_HUNGRY
// then signal to the LEFT and RIGHT threads
// so they can start eating when their neighbors are not eating
void put_forks(int i, t_data *data)
{
pthread_mutex_lock(data->state_mutex);
data->state[i] = NOT_HUNGRY;
test(LEFT, data);
test(RIGHT, data);
pthread_mutex_unlock(data->state_mutex);
}
void *philosopher(void *_arg)
{
t_arg *arg = _arg;
while (true)
{
printf("%d is thinking\n", arg->i);
take_forks(arg->i, arg->data);
printf("%d is eating\n", arg->i);
usleep(arg->data->t_eat * 1000);
put_forks(arg->i, arg->data);
printf("%d is sleeping\n", arg->i);
usleep(arg->data->t_sleep * 1000);
}
return (NULL);
}
void data_init(t_data *data, pthread_mutex_t *state_mutex, char **argv)
{
int i = 0;
data->n = atoi(argv[1]);
data->t_eat = atoi(argv[2]);
data->t_sleep = atoi(argv[3]);
pthread_mutex_init(state_mutex, NULL);
data->state_mutex = state_mutex;
data->state = malloc(data->n * sizeof(int));
data->locked = malloc(data->n * sizeof(bool));
while (i < data->n)
{
data->state[i] = NOT_HUNGRY;
data->locked[i] = true;
i++;
}
}
int main(int argc, char **argv)
{
pthread_mutex_t state_mutex;
t_data data;
t_arg *args;
pthread_t *threads;
int i;
if (argc != 4)
{
fputs("Error\nInvalid argument count\n", stderr);
return (1);
}
data_init(&data, &state_mutex, argv);
args = malloc(data.n * sizeof(t_arg));
i = 0;
while (i < data.n)
{
args[i].data = &data;
args[i].i = i;
i++;
}
threads = malloc(data.n * sizeof(pthread_t));
i = 0;
while (i < data.n)
{
pthread_create(threads + i, NULL, philosopher, args + i);
i++;
}
i = 0;
while (i < data.n)
pthread_join(threads[i++], NULL);
}
Your spin loop while (data->locked[i]); is a data race; you don't hold the lock while reading it data->locked[i], and so another thread could take the lock and write to that same variable while you are reading it. In fact, you rely on that happening. But this is undefined behavior.
Immediate practical consequences are that the compiler can delete the test (since in the absence of a data race, data->locked[i] could not change between iterations), or delete the loop altogether (since it's now an infinite loop, and nontrivial infinite loops are UB). Of course other undesired outcomes are also possible.
So you have to hold the mutex while testing the flag. If it's false, you should then hold the mutex until you set it true and do your other work; otherwise there is a race where another thread could get it first. If it's true, then drop the mutex, wait a little while, take it again, and retry.
(How long is a "little while", and what work you choose to do in between, are probably things you should test. Depending on what kind of fairness algorithms your pthread implementation uses, you might run into situations where take_forks succeeds in retaking the lock even if put_forks is also waiting to lock it.)
Of course, in a "real" program, you wouldn't do it this way in the first place; you'd use a condition variable.

Unlocks all pthread mutexes in mutex array

Im trying to write a function that unlocks all pthread mutexes provided in an array of mutexes.
The array is mutexv and the number of mutexes in given by mutexc.
The function should return 0 on success,
-1 otherwise.
my function so far:
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <alloca.h>
#include "pthread.h"
#include "multi_mutex.h"
int multi_mutex_unlock(pthread_mutex_t **mutexv, int mutexc)
{
(void) mutexv;
(void) mutexc;
pthread_mutex_init(*mutexv, NULL);
for (int i=0; i<mutexc; i++){
if (pthread_mutex_unlock(*mutexv) !=0){
return -1;
}
}
return 0;
}
having a hard time figuring out what im doing wrong.
// correct type for specifying array sizes is size_t, not int:
int multi_mutex_unlock(pthread_mutex_t **mutexv, size_t mutexc)
{
// you wouldn't initialize here, that needs to occur much earlier
//pthread_mutex_init(*mutexv, NULL);
for (size_t i = 0; i < mutexc; i++)
{
if (pthread_mutex_unlock(mutexv[i]) != 0)
// you need to index properly ^^^
{
return -1;
}
}
return 0;
}
Actually a while loop can be more elegant:
int multi_mutex_unlock(pthread_mutex_t **mutexv, size_t mutexc)
{
while(mutexc)
{
if (pthread_mutex_unlock(*mutexv) != 0)
{
return -1;
}
mutexc--; // decrement the remaining number
mutexv++; // increment the pointer to point to next mutex
}
return 0;
// or totally compact as:
for(; mutexc; --mutexc, ++mutexv)
{
if (pthread_mutex_unlock(*mutexv) != 0)
{
return -1;
}
}
}
Finally: You don't give any information on how many mutexes actually could be unlocked (or alternatively, how many have not) – you might return that number instead of -1, then any value different from originally passed mutexc would mean an error occurred.

How to modify structure elements atomically without using locks in C?

I would like to modify some elements of a structure atomically.
My current implementation uses mutexes to protect the critical code, and can be seen below.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
pthread_mutex_t thread_mutex = PTHREAD_MUTEX_INITIALIZER;
#define ITER 100000
typedef struct global_status {
int32_t context_delta;
uint32_t global_access_count;
} global_status_t;
global_status_t g_status;
void *context0(void *ptr)
{
unsigned int iter = ITER;
while (iter--) {
wait_event_from_device0();
pthread_mutex_lock(&thread_mutex);
g_status.context_delta++;
g_status.global_access_count++;
pthread_mutex_unlock(&thread_mutex);
}
return NULL;
}
void *context1(void *ptr)
{
unsigned int iter = ITER;
while (iter--) {
wait_event_from_device1();
pthread_mutex_lock(&thread_mutex);
g_status.context_delta--;
g_status.global_access_count++;
pthread_mutex_unlock(&thread_mutex);
}
return NULL;
}
int main(int argc, char **argv)
{
pthread_t tid0, tid1;
int iret;
if ((iret = pthread_create(&tid0, NULL, context0, NULL))) {
fprintf(stderr, "context0 creation error!\n");
return EXIT_FAILURE;
}
if ((iret = pthread_create(&tid1, NULL, context1, NULL))) {
fprintf(stderr, "context1 creation error!\n");
return EXIT_FAILURE;
}
pthread_join(tid0, NULL);
pthread_join(tid1, NULL);
printf("%d, %d\n", g_status.context_delta, g_status.global_access_count);
return 0;
}
I am planning to port this code into an RTOS which does not support posix, and I would like to do this operation atomically without using mutexes or disabling/enabling interrupts.
How can I do this operation?
Is it possible by using 'atomic compare and swap function' (CAS)?
Seems like in your example you have two threads servicing to different devices. You maybe able to do away with locking completely using a per-device structure. The global will be the aggregate of all per-device statistics. If you do need locks you can use CAS, LL/SC or any supported underlying atomic construct.
What i do is create a union with all the fields I want to change at the same time. like this:
union {
struct {
int m_field1;
unsigned short m_field2 : 2,
m_field3 : 1;
BYTE m_field4;
}
unsigned long long m_n64;
TData(const TData& r) { m_n64 = r.m_n64; }
} TData;
You embed unions like that inside your larger struct like this:
struct {
...
volatile TData m_Data;
...
} TBiggerStruct;
Then i do something like this:
while (1) {
TData Old = BiggerSharedStruct.m_Data, New = Old;
New.field1++;
New.field4--;
if (CAS(&SharedData.m_n64, Old.m_n64, New.m_n64))
break; // success
}
I do a lot of packing of fields that I want to change at the same time into the smallest possible 16, 32, or 64 bit structure. I think 128 bit stuff on intel is not as fast as the 64 bit stuff, so I avoid it. I haven't benchmarked it in awhile so I could be wrong on that.

Deallocating memory in multi-threaded environment

I'm having a hard time figuring out how to manage deallocation of memory in multithreaded environments. Specifically what I'm having a hard time with is using a lock to protect a structure, but when it's time to free the structure, you have to unlock the lock to destroy the lock itself. Which will cause problems if a separate thread is waiting on that same lock that you need to destroy.
I'm trying to come up with a mechanism that has retain counts, and when the object's retain count is 0, it's all freed. I've been trying a number of different things but just can't get it right. As I've been doing this it seems like you can't put the locking mechanism inside of the structure that you need to be able to free and destroy, because that requires you unlock the the lock inside of it, which could allow another thread to proceed if it was blocked in a lock request for that same structure. Which would mean that something undefined is guaranteed to happen - the lock was destroyed, and deallocated so either you get memory access errors, or you lock on undefined behavior..
Would someone mind looking at my code? I was able to put together a sandboxed example that demonstrates what I'm trying without a bunch of files.
http://pastebin.com/SJC86GDp
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
struct xatom {
short rc;
pthread_rwlock_t * rwlck;
};
typedef struct xatom xatom;
struct container {
xatom * atom;
};
typedef struct container container;
#define nr 1
#define nw 2
pthread_t readers[nr];
pthread_t writers[nw];
container * c;
void retain(container * cont);
void release(container ** cont);
short retain_count(container * cont);
void * rth(void * arg) {
short rc;
while(1) {
if(c == NULL) break;
rc = retain_count(c);
}
printf("rth exit!\n");
return NULL;
}
void * wth(void * arg) {
while(1) {
if(c == NULL) break;
release((container **)&c);
}
printf("wth exit!\n");
return NULL;
}
short retain_count(container * cont) {
short rc = 1;
pthread_rwlock_rdlock(cont->atom->rwlck);
printf("got rdlock in retain_count\n");
rc = cont->atom->rc;
pthread_rwlock_unlock(cont->atom->rwlck);
return rc;
}
void retain(container * cont) {
pthread_rwlock_wrlock(cont->atom->rwlck);
printf("got retain write lock\n");
cont->atom->rc++;
pthread_rwlock_unlock(cont->atom->rwlck);
}
void release(container ** cont) {
if(!cont || !(*cont)) return;
container * tmp = *cont;
pthread_rwlock_t ** lock = (pthread_rwlock_t **)&(*cont)->atom->rwlck;
pthread_rwlock_wrlock(*lock);
printf("got release write lock\n");
if(!tmp) {
printf("return 2\n");
pthread_rwlock_unlock(*lock);
if(*lock) {
printf("destroying lock 1\n");
pthread_rwlock_destroy(*lock);
*lock = NULL;
}
return;
}
tmp->atom->rc--;
if(tmp->atom->rc == 0) {
printf("deallocating!\n");
*cont = NULL;
pthread_rwlock_unlock(*lock);
if(pthread_rwlock_trywrlock(*lock) == 0) {
printf("destroying lock 2\n");
pthread_rwlock_destroy(*lock);
*lock = NULL;
}
free(tmp->atom->rwlck);
free(tmp->atom);
free(tmp);
} else {
pthread_rwlock_unlock(*lock);
}
}
container * new_container() {
container * cont = malloc(sizeof(container));
cont->atom = malloc(sizeof(xatom));
cont->atom->rwlck = malloc(sizeof(pthread_rwlock_t));
pthread_rwlock_init(cont->atom->rwlck,NULL);
cont->atom->rc = 1;
return cont;
}
int main(int argc, char ** argv) {
c = new_container();
int i = 0;
int l = 4;
for(i=0;i<l;i++) retain(c);
for(i=0;i<nr;i++) pthread_create(&readers[i],NULL,&rth,NULL);
for(i=0;i<nw;i++) pthread_create(&writers[i],NULL,&wth,NULL);
sleep(2);
for(i=0;i<nr;i++) pthread_join(readers[i],NULL);
for(i=0;i<nw;i++) pthread_join(writers[i],NULL);
return 0;
}
Thanks for any help!
Yes, you can't put the key inside the safe. Your approach with refcount (create object when requested and doesn't exist, delete on last release) is correct. But the lock must exist at least a moment before object is created and after it is destroyed - that is, while it is used. You can't delete it from inside of itself.
OTOH, you don't need countless locks, like one for each object you create. One lock that excludes obtaining and releasing of all objects will not create much performance loss at all. So just create the lock on init and destroy on program end. Otaining/releasing an object should take short enough that lock on variable A blocking access to unrelated variable B should almost never happen. If it happens - you can still introduce one lock per all rarely obtained variables and one per each frequently obtained one.
Also, there seems to be no point for rwlock, plain mutex suffices, and the create/destroy operations MUST exclude each other, not just parallel instances of themselves - so use pthread_create_mutex() family instead.

Avoiding starvation when attempting to use a many-to-many implementation

I am trying to grant access to a shared resource to two types of threads. It can be accessed by more than one threads, if, and only if, that thread is of the same type. Let us consider blacks & whites. When the resource is used by whites, it cannot be used by blacks and vice-versa.
I attempted to implement this using semaphores. Once a black tries to access the resource, it will increment the number of blacks and if that number is 1, it will block the whites from accessing it.
Issue: there is a noticeable starvation when there are more than 1 thread of each type (in my case threads with id 0 never used it). I attempted to fix this by adding an extra semaphore to serve as a queue.
Observation: this resembles very well to the readers-writers problem, except there is a many to many access criteria. (it can be used by multiple threads of the same type) I have been bashing my head quite a lot around this problem lately and I cannot seem to understand how should I approach this.
Now, for some code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <wait.h>
#include <pthread.h>
#include <semaphore.h>
#define MAX_RAND 100
#define TRUE 1
#define FALSE 0
#define WHITES 3
#define BLACKS 2
#define MAX_WORLOAD 10
sem_t semaphore;
sem_t resource_semaphore;
sem_t service_queue;
volatile int resource = 0;
volatile int currentWhites = 0;
volatile int currentBlacks = 0;
typedef struct
{
char *type;
int *id;
} data;
void *white(void *args)
{
data *thread_data = (data *)args;
int id = *(thread_data->id);
char *type = thread_data->type;
for (int i = 0; i < MAX_WORLOAD; i++)
{
sem_wait(&service_queue);
sem_wait(&semaphore);
sem_post(&service_queue);
currentWhites++;
if (currentWhites == 1)
{
sem_wait(&resource_semaphore);
}
sem_post(&semaphore);
sem_wait(&semaphore);
currentBlacks--;
resource = rand() % MAX_RAND;
printf("Thread %d of type %s has updated resource to %d\n\n", id, type, resource);
if (currentWhites == 0)
{
sem_post(&resource_semaphore);
}
sem_post(&semaphore);
}
}
void *black(void *args)
{
data *thread_data = (data *)args;
int id = *(thread_data->id);
char *type = thread_data->type;
for (int i = 0; i < MAX_WORLOAD; i++)
{
sem_wait(&service_queue);
sem_wait(&semaphore);
sem_post(&service_queue);
currentBlacks++;
if (currentBlacks == 1)
{
sem_wait(&resource_semaphore);
}
sem_post(&semaphore);
sem_wait(&semaphore);
currentBlacks--;
resource = rand() % MAX_RAND;
printf("Thread %d of type %s has updated resource to %d\n\n", id, type, resource);
if (currentBlacks == 0)
{
sem_post(&resource_semaphore);
}
sem_post(&semaphore);
}
}
data *initialize(pthread_t threads[], int size, char *type)
{
data *args = malloc(sizeof(data) * size);
int *id = malloc(sizeof(int));
void *function;
if (type == "WHITE")
{
function = white;
}
else
{
function = black;
}
for (int i = 0; i < size; i++)
{
*id = i;
args[i].type = type;
args[i].id = id;
printf("Initializing %d of type %s\n", *args[i].id, args[i].type);
pthread_create(&threads[i], NULL, function, (void **)&args[i]);
}
return args;
}
void join(pthread_t threads[], int size)
{
for (int i = 0; i < size; i++)
{
pthread_join(threads[i], NULL);
}
}
void initialize_locks()
{
sem_init(&semaphore, 0, 1);
sem_init(&resource_semaphore, 0, 1);
sem_init(&service_queue, 0, 1);
}
int main()
{
initialize_locks();
pthread_t whites[WHITES];
pthread_t blacks[BLACKS];
char *white = "white";
char *black = "black";
data *whites_arg = initialize(whites, WHITES, white);
data *blacks_arg = initialize(blacks, BLACKS, black);
join(whites, WHITES);
join(blacks, BLACKS);
free(whites_arg);
free(blacks_arg);
return 0;
}
If you want to force alternation between two types of threads accessing a single thing you can use two semaphores. Make it so the blacks and whites each have their own semaphores, start one semaphore with 0 keys and the other with 10 or something, then make it so that the whites release a key to the black semaphore, and the blacks release a key to the white semaphore, this way if you have 10 white threads in, when one of them unlocks you won't be able to put a 10th white thread in, but you will be able to put a black thread in, so that when all of the white threads release their keys you will have no white threads currently accessing the thing.
TL;DR: two semaphores that post to each other instead of themselves will allow alternation between groups, however independent of this operation you need to also make sure that whites don't go while blacks are still in.

Resources