How can I make sure multiple threads in the Linux kernel have completed before proceeding?
See the example code (slightly modified) from a previous question (How to join a thread in Linux kernel?)
void *func(void *arg) {
// doing something
return NULL;
}
int init_module(void) {
struct task_struct* thread[5];
int i;
for (i=0; i<5; i++) {
thread[i] = kthread_run(func, (void*) arg, "TestThread");
wake_up_process(thread[i]);
}
// wait here until all 5 threads are complete
// do something else
return 0;
}
The answer from this previous question is quite detailed (https://stackoverflow.com/a/29961182/7431886), which is great, but it only addresses the scope of the original question (waiting for only a specific one of the threads to finish).
How can I generalize either the semaphore or completion methods detailed in this answer to wait for N threads instead of just a specific one?
After some experimentation, this seems to be the best way to simulate a rudimentary thread join in the kernel. I used the completion method, not the semaphore method since I found it more straightforward.
struct my_thread_data {
struct completion *comp;
... // anything else you want to pass through
};
void *foo(void *arg) {
// doing something
return NULL;
}
int init_module(void) {
struct task_struct *threads[5];
struct completion comps[5];
struct my_thread_data data[5];
int i;
for (i=0; i<5; i++) {
init_completion(comps + i);
data[i].comp = comps + i;
thread[i] = kthread_run(&foo, (void*)(data + i), "ThreadName");
}
// wait here until all 5 threads are complete
for (i=0; i<5; i++) {
wait_for_completion(comps + i);
}
// do something else once threads are complete
return 0;
}
Related
I wrote this program to solve the dining philosophers problem using Dijkstra's algorithm, notice that I'm using an array of booleans (data->locked) instead of an array of binary semaphores.
I'm not sure if this solution is valid (hence the SO question).
Will access to the data->locked array in both test and take_forks functions cause data races? if so is it even possible to solve this problem using Dijkstra's algorithm with only mutexes?
I'm only allowed to use mutexes, no semaphores, no condition variables (it's an assignment).
Example of usage:
./a.out 4 1000 1000
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <stdbool.h>
#define NOT_HUNGRY 1
#define HUNGRY 2
#define EATING 3
#define RIGHT ((i + 1) % data->n)
#define LEFT ((i + data->n - 1) % data->n)
typedef struct s_data
{
int n;
int t_sleep;
int t_eat;
int *state;
bool *locked;
pthread_mutex_t *state_mutex;
} t_data;
typedef struct s_arg
{
t_data *data;
int i;
} t_arg;
int ft_min(int a, int b)
{
if (a < b)
return (a);
return (b);
}
int ft_max(int a, int b)
{
if (a > b)
return (a);
return (b);
}
// if the LEFT and RIGHT threads are not eating
// and thread number i is hungry, change its state to EATING
// and signal to the while loop in `take_forks` to stop blocking.
// if a thread has a state of HUNGRY then it's guaranteed
// to be out of the critical section of `take_forks`.
void test(int i, t_data *data)
{
if (
data->state[i] == HUNGRY
&& data->state[LEFT] != EATING
&& data->state[RIGHT] != EATING
)
{
data->state[i] = EATING;
data->locked[i] = false;
}
}
// set the state of the thread number i to HUNGRY
// and block until the LEFT and RIGHT threads are not EATING
// in which case they will call `test` from `put_forks`
// which will result in breaking the while loop
void take_forks(int i, t_data *data)
{
pthread_mutex_lock(data->state_mutex);
data->locked[i] = true;
data->state[i] = HUNGRY;
test(i, data);
pthread_mutex_unlock(data->state_mutex);
while (data->locked[i]);
}
// set the state of the thread number i to NOT_HUNGRY
// then signal to the LEFT and RIGHT threads
// so they can start eating when their neighbors are not eating
void put_forks(int i, t_data *data)
{
pthread_mutex_lock(data->state_mutex);
data->state[i] = NOT_HUNGRY;
test(LEFT, data);
test(RIGHT, data);
pthread_mutex_unlock(data->state_mutex);
}
void *philosopher(void *_arg)
{
t_arg *arg = _arg;
while (true)
{
printf("%d is thinking\n", arg->i);
take_forks(arg->i, arg->data);
printf("%d is eating\n", arg->i);
usleep(arg->data->t_eat * 1000);
put_forks(arg->i, arg->data);
printf("%d is sleeping\n", arg->i);
usleep(arg->data->t_sleep * 1000);
}
return (NULL);
}
void data_init(t_data *data, pthread_mutex_t *state_mutex, char **argv)
{
int i = 0;
data->n = atoi(argv[1]);
data->t_eat = atoi(argv[2]);
data->t_sleep = atoi(argv[3]);
pthread_mutex_init(state_mutex, NULL);
data->state_mutex = state_mutex;
data->state = malloc(data->n * sizeof(int));
data->locked = malloc(data->n * sizeof(bool));
while (i < data->n)
{
data->state[i] = NOT_HUNGRY;
data->locked[i] = true;
i++;
}
}
int main(int argc, char **argv)
{
pthread_mutex_t state_mutex;
t_data data;
t_arg *args;
pthread_t *threads;
int i;
if (argc != 4)
{
fputs("Error\nInvalid argument count\n", stderr);
return (1);
}
data_init(&data, &state_mutex, argv);
args = malloc(data.n * sizeof(t_arg));
i = 0;
while (i < data.n)
{
args[i].data = &data;
args[i].i = i;
i++;
}
threads = malloc(data.n * sizeof(pthread_t));
i = 0;
while (i < data.n)
{
pthread_create(threads + i, NULL, philosopher, args + i);
i++;
}
i = 0;
while (i < data.n)
pthread_join(threads[i++], NULL);
}
Your spin loop while (data->locked[i]); is a data race; you don't hold the lock while reading it data->locked[i], and so another thread could take the lock and write to that same variable while you are reading it. In fact, you rely on that happening. But this is undefined behavior.
Immediate practical consequences are that the compiler can delete the test (since in the absence of a data race, data->locked[i] could not change between iterations), or delete the loop altogether (since it's now an infinite loop, and nontrivial infinite loops are UB). Of course other undesired outcomes are also possible.
So you have to hold the mutex while testing the flag. If it's false, you should then hold the mutex until you set it true and do your other work; otherwise there is a race where another thread could get it first. If it's true, then drop the mutex, wait a little while, take it again, and retry.
(How long is a "little while", and what work you choose to do in between, are probably things you should test. Depending on what kind of fairness algorithms your pthread implementation uses, you might run into situations where take_forks succeeds in retaking the lock even if put_forks is also waiting to lock it.)
Of course, in a "real" program, you wouldn't do it this way in the first place; you'd use a condition variable.
Consider the following program,
static long count = 0;
void thread()
{
printf("%d\n",++count);
}
int main()
{
pthread_t t;
sigset_t set;
int i,limit = 30000;
struct rlimit rlim;
getrlimit(RLIMIT_NPROC, &rlim);
rlim.rlim_cur = rlim.rlim_max;
setrlimit(RLIMIT_NPROC, &rlim);
for(i=0; i<limit; i++) {
if(pthread_create(&t,NULL,(void *(*)(void*))thread, NULL) != 0) {
printf("thread creation failed\n");
return -1;
}
}
sigemptyset(&set);
sigsuspend(&set);
return 0;
}
This program is expected to print 1 to 30000. But it some times prints 29945, 29999, 29959, etc. Why this is happening?
Because count isn't atomic, so you have a race condition both in the increment and in the subsequent print.
The instruction you need is atomic_fetch_add, to increment the counter and avoid the race condition. The example on cppreference illustrates the exact problem you laid out.
Your example can be made to work with just a minor adjustment:
#include <stdio.h>
#include <signal.h>
#include <sys/resource.h>
#include <pthread.h>
#include <stdatomic.h>
static atomic_long count = 1;
void * thread(void *data)
{
printf("%ld\n", atomic_fetch_add(&count, 1));
return NULL;
}
int main()
{
pthread_t t;
sigset_t set;
int i,limit = 30000;
struct rlimit rlim;
getrlimit(RLIMIT_NPROC, &rlim);
rlim.rlim_cur = rlim.rlim_max;
setrlimit(RLIMIT_NPROC, &rlim);
for(i=0; i<limit; i++) {
if(pthread_create(&t, NULL, thread, NULL) != 0) {
printf("thread creation failed\n");
return -1;
}
}
sigemptyset(&set);
sigsuspend(&set);
return 0;
}
I made a handful of other changes, such as fixing the thread function signature and using the correct printf format for printing longs. But the atomic issue is why you weren't printing all the numbers you expected.
Why this is happening?
Because you have a data race (undefined behavior).
In particular, this statement:
printf("%d\n",++count);
modifies a global (shared) variable without any locking. Since the ++ does not atomically increment it, it's quite possible for multiple threads to read the same value (say 1234), increment it, and store the updated value in parallel, resulting in 1235 being printed repeatedly (two or more times), and one or more of the increments being lost.
A typical solution is to either use mutex to avoid the data race, or (rarely) an atomic variable (which guarantees atomic increment). Beware: atomic variables are quite hard to get right. You are not ready to use them yet.
In the code below I wrote a program to perform add/remove operations on an int array using multithreading. The condition is that multiple threads cannot make operations on the same cell, but parallel operations can be made on different cells.
I thought in order to implement such conditions I'd need to use multiple mutexes and condition variables, to be exact, as many as there're cells in the array. The initial value of all cells of my array is 10 and threads increment/decrement this value by 3.
The code below seems to work (the cell values of the array after all threads finished working is as expected) but I don't understand a few things:
I first spawn adder threads which sleep for a second. In addition each thread has printf statement which is triggered if a thread waits. Remove threads don't sleep so I expect remove threads to invoke their printf statements because they must wait a second at least before adder threads finish their work. But remover threads never call printf.
My second concern: as I mentioned I first spawn adder threads so I expect the cells value go from 10 to 13. Then if remover thread acquires lock the value can go from 13 to 10 OR if adder thread acquires the lock then the cell value will go from 13 to 16. But I don't see the behavior in printf statements inside threads. For example one of the printf sequences I had: add thread id and cell id 1: cell value 10->13, then remove thread id and cell id 1: cell value 10->7 then add thread id and cell id 1: cell value 10->13. This doesn't make sense. I made sure that the threads all point to the same array.
Bottom line I'd like to know whether my solution is correct and if yes why is the behavior I described occurring. If my solution is incorrect I'd appreciate example of correct solution or at least general direction.
This is the code (all the logic is in AdderThread, RemoveThread):
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#define ARR_LEN 5
#define THREADS_NUM 5
#define INIT_VAL 10
#define ADD_VAL 3
#define REMOVE_VAL 3
#define ADDER_LOOPS 2
typedef struct helper_t {
int threadId;
int * arr;
int * stateArr; //0 if free, 1 if busy
} helper_t;
enum STATE {FREE, BUSY};
enum ERRORS {MUTEX, COND, CREATE, JOIN, LOCK, UNLOCK, WAIT, BROADCAST};
pthread_mutex_t mutexArr[THREADS_NUM];
pthread_cond_t condArr[THREADS_NUM];
void errorHandler(int errorId) {
switch (errorId) {
case MUTEX:
printf("mutex error\n");
break;
case COND:
printf("cond error\n");
break;
case CREATE:
printf("create error\n");
break;
case JOIN:
printf("join error\n");
break;
case LOCK:
printf("lock error\n");
break;
case UNLOCK:
printf("unlock error\n");
break;
case WAIT:
printf("wait error\n");
break;
case BROADCAST:
printf("broadcast error\n");
break;
default:
printf("default switch\n");
break;
}
}
void mallocError() {
printf("malloc error\nExiting app\n");
exit(EXIT_FAILURE);
}
void initMutexesAndConds(pthread_mutex_t * mutexArr, pthread_cond_t * condArr) {
int i;
for(i = 0; i < THREADS_NUM; i++) {
pthread_mutex_init(&mutexArr[i], NULL);
pthread_cond_init(&condArr[i], NULL);
}
}
helper_t * initStructs(int * arr, int * stateArr) {
int i;
helper_t * helpers = (helper_t *) malloc(sizeof(helper_t) * THREADS_NUM);
if(!helpers) {
mallocError();
} else {
for(i = 0; i < THREADS_NUM; i++) {
helpers[i].threadId = i;
helpers[i].arr = arr;
helpers[i].stateArr = stateArr;
}
}
return helpers;
}
void printArr(int * arr, int len) {
int i;
for(i = 0; i < len; i++) {
printf("%d, ", arr[i]);
}
printf("\n");
}
void * AdderThread(void * arg) {
int i;
helper_t * h = (helper_t *) arg;
int id = h->threadId;
for(i = 0; i < ADDER_LOOPS; i++) {
pthread_mutex_t * mutex = &mutexArr[id];
pthread_cond_t * cond = &condArr[id];
if(pthread_mutex_lock(mutex)) {
errorHandler(LOCK);
}
while(h->stateArr[id] == BUSY) {
printf("adder id %d waiting...\n", id);
if(pthread_cond_wait(cond, mutex)) {
errorHandler(WAIT);
}
}
h->stateArr[id] = BUSY;
sleep(1);
h->arr[id] = h->arr[id] + ADD_VAL;
printf("add thread id and cell id %d: cell value %d->%d\n", id, h->arr[id]-ADD_VAL, h->arr[id]);
h->stateArr[id] = FREE;
if(pthread_cond_broadcast(cond)) {
errorHandler(BROADCAST);
}
if(pthread_mutex_unlock(mutex)) {
errorHandler(UNLOCK);
}
}
pthread_exit(NULL);
}
void * RemoveThread(void * arg) {
helper_t * h = (helper_t *) arg;
int id = h->threadId;
pthread_mutex_t * mutex = &mutexArr[id];
pthread_cond_t * cond = &condArr[id];
if(pthread_mutex_lock(mutex)) {
errorHandler(LOCK);
}
while(h->stateArr[id] == BUSY) {
printf("remover id %d waiting...\n", id);
if(pthread_cond_wait(cond, mutex)) {
errorHandler(WAIT);
}
}
h->stateArr[id] = BUSY;
h->arr[id] = h->arr[id] - REMOVE_VAL;
printf("remove thread id and cell id %d: cell value %d->%d\n", id, h->arr[id], h->arr[id]-ADD_VAL);
h->stateArr[id] = FREE;
if(pthread_cond_broadcast(cond)) {
errorHandler(BROADCAST);
}
if(pthread_mutex_unlock(mutex)) {
errorHandler(UNLOCK);
}
pthread_exit(NULL);
}
int main() {
int i;
helper_t * adderHelpers;
helper_t * removeHelpers;
pthread_t adders[THREADS_NUM];
pthread_t removers[THREADS_NUM];
int * arr = (int *) malloc(sizeof(int) * ARR_LEN);
int * stateArr = (int *) malloc(sizeof(int) * ARR_LEN);
if(!arr || !stateArr) {
mallocError();
}
for(i = 0; i < ARR_LEN; i++) {
arr[i] = INIT_VAL;
stateArr[i] = FREE;
}
initMutexesAndConds(mutexArr, condArr);
adderHelpers = initStructs(arr, stateArr);
removeHelpers = initStructs(arr, stateArr);
for(i = 0; i < THREADS_NUM; i++) {
pthread_create(&adders[i], NULL, AdderThread, &adderHelpers[i]);
pthread_create(&removers[i], NULL, RemoveThread, &removeHelpers[i]);
}
for(i = 0; i < THREADS_NUM; i++) {
pthread_join(adders[i], NULL);
pthread_join(removers[i], NULL);
}
printf("the results are:\n");
printArr(arr, THREADS_NUM);
printf("DONE.\n");
return 0;
}
1) This code sequence in Addr:
h->stateArr[id] = BUSY;
sleep(1);
h->arr[id] = h->arr[id] + ADD_VAL;
printf("add thread id and cell id %d: cell value %d->%d\n", id, h->arr[id]-ADD_VAL, h->arr[id]);
h->stateArr[id] = FREE;
Is execute with the mutex locked; thus Remove would never get a chance to see the state as anything but FREE.
2) There is no guarantee that mutex ownership alternates (afaik), but at the very least, to properly co-ordinate threads you should never rely upon such an implementation detail. It is the difference between working and “happens to work”, which usually leads to “used to work”....
If you put the sleep() between the mutex unlock and mutex lock, you might have a better case, but as it is, it just unlocks it then locks it again, so the system is well within its rights to just let it continue executing.
[ I ran out of space in comments ... ]:
Yes, the condition variables are doing nothing for you here. The idea of a condition variable is to be able to be notified when a significant event, such as a state change, has occurred on some shared objection.
For example, a reservoir might have a single condition variable for the water level. Multiplexed onto that might be many conditions: level < 1m; level > 5m; level > 10m. To keep the systems independent (thus working), the bit that updates the level might just:
pthread_mutex_lock(&levellock);
level = x;
pthread_cond_broadcast(&newlevel);
pthread_mutex_unlock(&levellock);
The actors implementing the conditions would do something like:
pthread_mutex_lock(&levellock);
while (1) {
if (level is my conditions) {
pthread_mutex_unlock(&levellock);
alert the media
pthread_mutex_lock(&levellock);
}
pthread_cond_wait(&newlevel, &levellock);
}
Thus I can add many “condition monitors” without breaking the level setting code, or the overall system. Many is finite, but by releasing the mutex while I alert the media, I avoid having my water monitoring system rely on the alarm handling.
If you are familiar with “publish/subscribe”, you might find this familiar. This is fundamentally the same model, just the PS hides a pile of details.
I am developing a userspace premptive thread library(fibre) that uses context switching as the base approach. For this I wrote a scheduler. However, its not performing as expected. Can I have any suggestions for this.
The structure of the thread_t used is :
typedef struct thread_t {
int thr_id;
int thr_usrpri;
int thr_cpupri;
int thr_totalcpu;
ucontext_t thr_context;
void * thr_stack;
int thr_stacksize;
struct thread_t *thr_next;
struct thread_t *thr_prev;
} thread_t;
The scheduling function is as follows:
void schedule(void)
{
thread_t *t1, *t2;
thread_t * newthr = NULL;
int newpri = 127;
struct itimerval tm;
ucontext_t dummy;
sigset_t sigt;
t1 = ready_q;
// Select the thread with higest priority
while (t1 != NULL)
{
if (newpri > t1->thr_usrpri + t1->thr_cpupri)
{
newpri = t1->thr_usrpri + t1->thr_cpupri;
newthr = t1;
}
t1 = t1->thr_next;
}
if (newthr == NULL)
{
if (current_thread == NULL)
{
// No more threads? (stop itimer)
tm.it_interval.tv_usec = 0;
tm.it_interval.tv_sec = 0;
tm.it_value.tv_usec = 0; // ZERO Disable
tm.it_value.tv_sec = 0;
setitimer(ITIMER_PROF, &tm, NULL);
}
return;
}
else
{
// TO DO :: Reenabling of signals must be done.
// Switch to new thread
if (current_thread != NULL)
{
t2 = current_thread;
current_thread = newthr;
timeq = 0;
sigemptyset(&sigt);
sigaddset(&sigt, SIGPROF);
sigprocmask(SIG_UNBLOCK, &sigt, NULL);
swapcontext(&(t2->thr_context), &(current_thread->thr_context));
}
else
{
// No current thread? might be terminated
current_thread = newthr;
timeq = 0;
sigemptyset(&sigt);
sigaddset(&sigt, SIGPROF);
sigprocmask(SIG_UNBLOCK, &sigt, NULL);
swapcontext(&(dummy), &(current_thread->thr_context));
}
}
}
It seems that the "ready_q" (head of the list of ready threads?) never changes, so the search of the higest priority thread always finds the first suitable element. If two threads have the same priority, only the first one has a chance to gain the CPU. There are many algorithms you can use, some are based on a dynamic change of the priority, other ones use a sort of rotation inside the ready queue. In your example you could remove the selected thread from its place in the ready queue and put in at the last place (it's a double linked list, so the operation is trivial and quite inexpensive).
Also, I'd suggest you to consider the performace issues due to the linear search in ready_q, since it may be a problem when the number of threads is big. In that case it may be helpful a more sophisticated structure, with different lists of threads for different levels of priority.
Bye!
I am trying to grant access to a shared resource to two types of threads. It can be accessed by more than one threads, if, and only if, that thread is of the same type. Let us consider blacks & whites. When the resource is used by whites, it cannot be used by blacks and vice-versa.
I attempted to implement this using semaphores. Once a black tries to access the resource, it will increment the number of blacks and if that number is 1, it will block the whites from accessing it.
Issue: there is a noticeable starvation when there are more than 1 thread of each type (in my case threads with id 0 never used it). I attempted to fix this by adding an extra semaphore to serve as a queue.
Observation: this resembles very well to the readers-writers problem, except there is a many to many access criteria. (it can be used by multiple threads of the same type) I have been bashing my head quite a lot around this problem lately and I cannot seem to understand how should I approach this.
Now, for some code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <wait.h>
#include <pthread.h>
#include <semaphore.h>
#define MAX_RAND 100
#define TRUE 1
#define FALSE 0
#define WHITES 3
#define BLACKS 2
#define MAX_WORLOAD 10
sem_t semaphore;
sem_t resource_semaphore;
sem_t service_queue;
volatile int resource = 0;
volatile int currentWhites = 0;
volatile int currentBlacks = 0;
typedef struct
{
char *type;
int *id;
} data;
void *white(void *args)
{
data *thread_data = (data *)args;
int id = *(thread_data->id);
char *type = thread_data->type;
for (int i = 0; i < MAX_WORLOAD; i++)
{
sem_wait(&service_queue);
sem_wait(&semaphore);
sem_post(&service_queue);
currentWhites++;
if (currentWhites == 1)
{
sem_wait(&resource_semaphore);
}
sem_post(&semaphore);
sem_wait(&semaphore);
currentBlacks--;
resource = rand() % MAX_RAND;
printf("Thread %d of type %s has updated resource to %d\n\n", id, type, resource);
if (currentWhites == 0)
{
sem_post(&resource_semaphore);
}
sem_post(&semaphore);
}
}
void *black(void *args)
{
data *thread_data = (data *)args;
int id = *(thread_data->id);
char *type = thread_data->type;
for (int i = 0; i < MAX_WORLOAD; i++)
{
sem_wait(&service_queue);
sem_wait(&semaphore);
sem_post(&service_queue);
currentBlacks++;
if (currentBlacks == 1)
{
sem_wait(&resource_semaphore);
}
sem_post(&semaphore);
sem_wait(&semaphore);
currentBlacks--;
resource = rand() % MAX_RAND;
printf("Thread %d of type %s has updated resource to %d\n\n", id, type, resource);
if (currentBlacks == 0)
{
sem_post(&resource_semaphore);
}
sem_post(&semaphore);
}
}
data *initialize(pthread_t threads[], int size, char *type)
{
data *args = malloc(sizeof(data) * size);
int *id = malloc(sizeof(int));
void *function;
if (type == "WHITE")
{
function = white;
}
else
{
function = black;
}
for (int i = 0; i < size; i++)
{
*id = i;
args[i].type = type;
args[i].id = id;
printf("Initializing %d of type %s\n", *args[i].id, args[i].type);
pthread_create(&threads[i], NULL, function, (void **)&args[i]);
}
return args;
}
void join(pthread_t threads[], int size)
{
for (int i = 0; i < size; i++)
{
pthread_join(threads[i], NULL);
}
}
void initialize_locks()
{
sem_init(&semaphore, 0, 1);
sem_init(&resource_semaphore, 0, 1);
sem_init(&service_queue, 0, 1);
}
int main()
{
initialize_locks();
pthread_t whites[WHITES];
pthread_t blacks[BLACKS];
char *white = "white";
char *black = "black";
data *whites_arg = initialize(whites, WHITES, white);
data *blacks_arg = initialize(blacks, BLACKS, black);
join(whites, WHITES);
join(blacks, BLACKS);
free(whites_arg);
free(blacks_arg);
return 0;
}
If you want to force alternation between two types of threads accessing a single thing you can use two semaphores. Make it so the blacks and whites each have their own semaphores, start one semaphore with 0 keys and the other with 10 or something, then make it so that the whites release a key to the black semaphore, and the blacks release a key to the white semaphore, this way if you have 10 white threads in, when one of them unlocks you won't be able to put a 10th white thread in, but you will be able to put a black thread in, so that when all of the white threads release their keys you will have no white threads currently accessing the thing.
TL;DR: two semaphores that post to each other instead of themselves will allow alternation between groups, however independent of this operation you need to also make sure that whites don't go while blacks are still in.