I have written a sample linux device driver code which will create two kernel threads and each will increment a single global variable. I have used wait-queues to perform the task of incrementing the variable, and each thread will wait on the wait queue until a timer expires and each thread is woken up at random.
But problem is when I inserted this module, the whole system is just freezing up, and I have to restart the machine. This is happening every time I inserted the module. I tried debugging the kthread code to see if I am entering dead-lock situation by mistake but I am unable to figure out anything wrong with the code.
Can anyone please tell me what I am doing wrong in the code to get the hang-up situation?
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/semaphore.h>
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/sched.h>
#include <linux/kthread.h>
spinlock_t my_si_lock;
pid_t kthread_pid1;
pid_t kthread_pid2 ;
static DECLARE_WAIT_QUEUE_HEAD(wqueue);
static struct timer_list my_timer;
int kthread_num;
/* the timer callback */
void my_timer_callback( unsigned long data ){
printk(KERN_INFO "my_timer_callback called (%ld).\n", jiffies );
if (waitqueue_active(&wqueue)) {
wake_up_interruptible(&wqueue);
}
}
/*Routine for the first thread */
static int kthread_routine_1(void *kthread_num)
{
//int num=(int)(*(int*)kthread_num);
int *num=(int *)kthread_num;
char kthread_name[15];
unsigned long flags;
DECLARE_WAITQUEUE(wait, current);
printk(KERN_INFO "Inside daemon_routine() %ld\n",current->pid);
allow_signal(SIGKILL);
allow_signal(SIGTERM);
do{
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&wqueue, &wait);
spin_lock_irqsave(&my_si_lock, flags);
printk(KERN_INFO "kernel_daemon [%d] incrementing the shared data=%d\n",current->pid,(*num)++);
spin_unlock_irqrestore(&my_si_lock, flags);
remove_wait_queue(&wqueue, &wait);
if (kthread_should_stop()) {
break;
}
}while(!signal_pending(current));
set_current_state(TASK_RUNNING);
return 0;
}
/*Routine for the second thread */
static int kthread_routine_2(void *kthread_num)
{
//int num=(int)(*(int*)kthread_num);
int *num=(int *)kthread_num;
char kthread_name[15];
unsigned long flags;
DECLARE_WAITQUEUE(wait, current);
printk(KERN_INFO "Inside daemon_routine() %ld\n",current->pid);
allow_signal(SIGKILL);
allow_signal(SIGTERM);
do{
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&wqueue, &wait);
spin_lock_irqsave(&my_si_lock, flags);
printk(KERN_INFO "kernel_daemon [%d] incrementing the shared data=%d\n",current->pid,(*num)++);
spin_unlock_irqrestore(&my_si_lock, flags);
remove_wait_queue(&wqueue, &wait);
if (kthread_should_stop()) {
break;
}
}while(!signal_pending(current));
set_current_state(TASK_RUNNING);
return 0;
}
static int __init signalexample_module_init(void)
{
int ret;
spin_lock_init(&my_si_lock);
init_waitqueue_head(&wqueue);
kthread_num=1;
printk(KERN_INFO "starting the first kernel thread with id ");
kthread_pid1 = kthread_run(kthread_routine_1,&kthread_num,"first_kthread");
printk(KERN_INFO "%ld \n",(long)kthread_pid1);
if(kthread_pid1< 0 ){
printk(KERN_ALERT "Kernel thread [1] creation failed\n");
return -1;
}
printk(KERN_INFO "starting the second kernel thread with id");
kthread_pid2 = kthread_run(kthread_routine_2,&kthread_num,"second_kthread");
printk(KERN_INFO "%ld \n",(long)kthread_pid2);
if(kthread_pid2 < 0 ){
printk(KERN_ALERT "Kernel thread [2] creation failed\n");
return -1;
}
setup_timer( &my_timer, my_timer_callback, 0 );
ret = mod_timer( &my_timer, jiffies + msecs_to_jiffies(2000) );
if (ret) {
printk("Error in mod_timer\n");
return -EINVAL;
}
return 0;
}
static void __exit signalexample_module_exit(void)
{
del_timer(&my_timer);
}
module_init(signalexample_module_init);
module_exit(signalexample_module_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Demonstrates use of kthread");
You need a call to schedule() in both of your thread functions:
/* In kernel thread function... */
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&wqueue, &wait);
schedule(); /* Add this call here */
spin_lock_irqsave(&my_si_lock, flags);
/* etc... */
Calling set_current_state(TASK_INTERRUPTIBLE) sets the state in the current process' task structure, which allows the scheduler to move the process off of the run queue once it sleeps. But then you have to tell the scheduler, "Okay, I've set a new state. Reschedule me now." You're missing this second step, so the changed flag won't take effect until the next time the scheduler decides to suspend your thread, and there's no way to know how soon that will happen, or which line of your code it's executing when it happens (except in the locked code - that shouldn't be interrupted).
I'm not really sure why it's causing your whole system to lock up, because your system's state is pretty unpredictable. Since the kernel threads weren't waiting for the timer to expire before grabbing locks and looping, I have no idea when you could expect the scheduler to actually take action on the new task struct states, and a lot of things could be happening in the meantime. Your threads are repeatedly calling add_wait_queue(&wqueue, &wait); and remove_wait_queue(&wqueue, &wait);, so who knows what state the wait queue is in by the time your timer callback fires. In fact, since the kernel threads are spinning, this code has a race condition:
if (waitqueue_active(&wqueue)) {
wake_up_interruptible(&wqueue);
}
It's possible that you have active tasks on the waitqueue when the if statement is executed, only to have them emptied out by the time wake_up_interruptible(&wqueue); is called.
By the way, I'm assuming your current goal of incrementing a global variable is just an exercise to learn waitqueues and sleep states. If you ever want to actually implement a shared counter, look at atomic operations instead, and you'll be able to dump the spinlock.
If you decide to keep the spinlock, you should switch to using the DEFINE_SPINLOCK() macro instead.
Also, as I mentioned in my comment, you should change your two kthread_pid variables to be of task_struct * type. You also need a call to kthread_stop(kthread_pid); in your __exit routine for each of the threads you start. kthread_should_stop() will never return true if you don't ever tell them to stop.
Related
In my destructor I want to destroy a thread cleanly.
My goal is to wait for a thread to finish executing and THEN destroy the thread.
The only thing I found about querying the state of a pthread is pthread_attr_setdetachstate but this only tells you if your thread is:
PTHREAD_CREATE_DETACHED
PTHREAD_CREATE_JOINABLE
Both of those have nothing to do with whether the thread is still running or not.
How do you query a pthread to see if it is still running?
It sounds like you have two questions here:
How can I wait until my thread completes?
Answer: This is directly supported by pthreads -- make your thread-to-be-stopped JOINABLE (when it is first started), and use pthread_join() to block your current thread until the thread-to-be-stopped is no longer running.
How can I tell if my thread is still running?
Answer: You can add a "thread_complete" flag to do the trick:
Scenario: Thread A wants to know if Thread B is still alive.
When Thread B is created, it is given a pointer to the "thread_complete" flag address. The "thread_complete" flag should be initialized to NOT_COMPLETED before the thread is created. Thread B's entry point function should immediately call pthread_cleanup_push() to push a "cleanup handler" which sets the "thread_complete" flag to COMPLETED.
See details about cleanup handlers here: pthread cleanup handlers
You'll want to include a corresponding pthread_cleanup_pop(1) call to ensure that the cleanup handler gets called no matter what (i.e. if the thread exits normally OR due to cancellation, etc.).
Then, Thread A can simply check the "thread_complete" flag to see if Thread B has exited yet.
NOTE: Your "thread_complete" flag should be declared "volatile" and should be an atomic type -- the GNU compilers provide the sig_atomic_t for this purpose. This allows the two threads consistent access the same data without the need for synchronization constructs (mutexes/semaphores).
pthread_kill(tid, 0);
No signal is sent, but error checking is still performed so you can use that to check
existence of tid.
CAUTION: This answer is incorrect. The standard specifically prohibits passing the ID of a thread whose lifetime has ended. That ID might now specify a different thread or, worse, it might refer to memory that has been freed, causing a crash.
I think all you really need is to call pthread_join(). That call won't return until the thread has exited.
If you only want to poll to see whether the thread is still running or not (and note that is usually not what you should be wanting to do!), you could have the thread set a volatile boolean to false just before it exits... then your main-thread could read the boolean and if it's still true, you know the thread is still running. (if it's false, on the other hand, you know the thread is at least almost gone; it may still be running cleanup code that occurs after it sets the boolean to false, though, so even in this case you should still call pthread_join before trying to free any resources the thread might have access to)
There is not fully portable solution, look if your platform supports pthread_tryjoin_np or pthread_timedjoin_np. So you just check if thread can be joined (of course created with PTHREAD_CREATE_JOINABLE).
Let me note on the "winning" answer, which has a huge hidden flaw, and in some contexts it can lead to crashes. Unless you use pthread_join, it will coming up again and again. Assume you are having a process and a shared library. Call the library lib.so.
You dlopen it, you start a thread in it. Assume you don't want it join to it, so you set it detachable.
Process and shared lib's logic doing its work, etc...
You want to load out lib.so, because you don't need it any more.
You call a shutdown on the thread and you say, that you want to read a flag afterwards from your lib.so's thread, that it have finished.
You continue on another thread with dlclose, because you see, that you have saw, that the flag is now showing the thread as "finished"
dlclose will load out all stack and code related memory.
Whops, but dlclose does not stop threads. And you know, even when you are in the last line of the cleanup handler to set the "thread is finished" volatile atomic flag variable, you still have to return from a lot of methods on the stack, giving back values, etc. If a huge thread priority was given to #5+#6's thread, you will receive dlclose before you could REALLY stop on the thread. You will have some nice crashes sometimes.
Let me point out, that this is not a hipothetical problem, I had the same issue on our project.
I believe I've come up with a solution that at least works for Linux. Whenever I create a thread I have it save it's LWP (Light Weight Process ID) and assign it a unique name, eg.
int lwp = syscall(SYS_gettid);
prctl(PR_SET_NAME, (long)"unique name", 0, 0, 0);
Then, to check if the thread exists later I open /proc/pid/task/lwp/comm and read it. If the file exists and it's contents match the unique name I assigned, the thread exists. Note that this does NOT pass a possibly defunct/reused TID to any library function, so no crashes.
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <pthread.h>
#include <sys/prctl.h>
#include <sys/file.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <syscall.h>
pthread_t subthread_tid;
int subthread_lwp;
#define UNIQUE_NAME "unique name"
bool thread_exists (pthread_t thread_id)
{
char path[100];
char thread_name[16];
FILE *fp;
bool thread_exists = false;
// If the /proc/<pid>/task/<lwp>/comm file exists and it's contents match the "unique name" the
// thread exists, and it's the original thread (TID has NOT been reused).
sprintf(path, "/proc/%d/task/%d/comm", getpid(), subthread_lwp);
fp = fopen(path, "r");
if( fp != NULL ) {
fgets(thread_name, 16, fp);
fclose(fp);
// Need to trim off the newline
thread_name[strlen(thread_name)-1] = '\0';
if( strcmp(UNIQUE_NAME, thread_name) == 0 ) {
thread_exists = true;
}
}
if( thread_exists ) {
printf("thread exists\n");
} else {
printf("thread does NOT exist\n");
}
return thread_exists;
}
void *subthread (void *unused)
{
subthread_lwp = syscall(SYS_gettid);
prctl(PR_SET_NAME, (long)UNIQUE_NAME, 0, 0, 0);
sleep(10000);
return NULL;
}
int main (int argc, char *argv[], char *envp[])
{
int error_number;
pthread_create(&subthread_tid, NULL, subthread, NULL);
printf("pthread_create()\n");
sleep(1);
thread_exists(subthread_tid);
pthread_cancel(subthread_tid);
printf("pthread_cancel()\n");
sleep(1);
thread_exists(subthread_tid);
error_number = pthread_join(subthread_tid, NULL);
if( error_number == 0 ) {
printf("pthread_join() successful\n");
} else {
printf("pthread_join() failed, %d\n", error_number);
}
thread_exists(subthread_tid);
exit(0);
}
#include <string.h>
#include <stdio.h>
#include <pthread.h>
#include <signal.h>
#include <unistd.h>
void* thread1 (void* arg);
void* thread2 (void* arg);
int main()
{
pthread_t thr_id;
pthread_create(&thr_id, NULL, thread1, NULL);
sleep(10);
}
void* thread1 (void* arg)
{
pthread_t thr_id = 0;
pthread_create(&thr_id, NULL, thread2, NULL);
sleep(5);
int ret = 0;
if( (ret = pthread_kill(thr_id, 0)) == 0)
{
printf("still running\n");
pthread_join(thr_id, NULL);
}
else
{
printf("RIP Thread = %d\n",ret);
}
}
void* thread2 (void* arg)
{
// sleep(5);
printf("I am done\n");
}
I have this code which implements a simple counter and writes it to a log file via printk in function void thread_code (void *arg), and then implements it in a real-time thread via __init rtcode_init(void). This question is an examination question.
I'm unsure why the code would cause linux to crash. Is it because of the static function, as that could cause a race condition if the memory address accessing the function was accessed by multiple threads? Would be great to have someone point out what is wrong with the code.
#define ARG 0
#define STACK_SIZE 1024
#define PRIORITY RT_SCHED_HIGHEST_PRIORITY
#define USE_FPU 1
#define NOW rt_get_time()
#define PERIOD nano2count(1000000)
/* Store data needed for the thread */
RT_TASK thread_data;
void thread_code(void *arg)
{
int counter = 0;
while (1) {
counter += 1;
printk("Counter = %d\n",counter);
}
return 0;
}
static int __init rtcode_init(void)
{
rt_set_periodic_mode();
start_rt_timer(PERIOD);
rt_task_init(&thread_data, thread_code, ARG, STACK_SIZE, PRIORITY, USE_FPU, NULL);
rt_task_make_periodic(&thread_data, NOW, PERIOD);
return 0;
}
This code creates a task with realtime priority which runs constantly. If your system has only one CPU core, this will prevent anything else from running, making the system unusable. I don't think it'd "crash", precisely, but it'd stop responding to input.
The main function is based on libevent, but there is a long run task in the function. So start N treads to run the tasks. Is is this idea OK? And how to use libevent and pthread together in C?
Bumping an old question, may have already been solved. But posting the answer just in case someone else needs it.
Yes, it is okay to do threading in this case. I recently used libevent in pthreads, and it seems to be working just fine. Here's the code :
#include <stdint.h>
#include <pthread.h>
#include <event.h>
void * thread_func (void *);
int main(void)
{
int32_t tid = 0, ret = -1;
struct event_base *evbase;
struct event *timer;
int32_t *t_ret = &ret;
struct timeval tv;
// 1. initialize libevent for pthreads
evthread_use_pthreads();
ret = pthread_create(&tid, NULL, thread_func, NULL);
// check ret for error
// 2. allocate event base
evbase = event_base_new();
// 3. allocate event object
timer = event_new(evbase, -1, EV_PERSIST, callback_func, NULL);
// 4. add event
tv.tv_sec = 0;
tv.tv_usec = 1000;
evtimer_add(timer, &tv);
// 5. start the event loop
event_base_dispatch(evbase); // event loop
// join pthread...
// 6. free resources
event_free(timer);
event_base_free(evbase);
return 0;
}
void * thread_func(void *arg)
{
struct event *ev;
struct event_base *base;
base = event_base_new();
ev = event_new(base, -1, EV_PERSIST, thread_callback, NULL);
event_add(ev, NULL); // wait forever
event_base_dispatch(base); // start event loop
event_free(ev);
event_base_free(base);
pthread_exit(0);
}
As you can see, in my case, the event for the main thread is timer. The base logic followed is as below :
call evthread_use_pthreads() to initialize libevent for pthreads on Linux (my case). For windows evthread_use_window_threads(). Check out the documentation given in event.h itself.
Allocate an event_base structure on global heap as instructed in documentation. Make sure to check return value for errors.
Same as above, but allocate event structure itself. In my case, I am not waiting on any file descriptor, so -1 is passed as argument. Also, I want my event to persist, hence EV_PERSIST . The code for callback functions is omitted.
Schedule the event for execution
Start the event loop
free the resources when done.
Libevent version used in my case is libevent2 5.1.9 , and you will also need libevent_pthreads.so library for linking.
cheers.
That would work.
In the I/O callback function delegates time consuming job to another thread of a thread pool. The exact mechanics depend on the interface of the worker thread or the thread pool.
To communicate the result back from the worker thread to the I/O thread use a pipe. The worker thread writes the pointer to the result object to the pipe and the I/O thread
wakes up and read the pointer from the pipe.
There is a multithreaded libevent example in this blog post:
http://www.roncemer.com/multi-threaded-libevent-server-example
His solution is, to quote:
The solution is to create one libevent event queue (AKA event_base) per active connection, each with its own event pump thread. This project does exactly that, giving you everything you need to write high-performance, multi-threaded, libevent-based socket servers.
NOTE This is for libev not libevent but the idea may apply.
Here I present an example for the community. Please comment and let me know if there are any noticable bugs. This example could include a signal handler for thread termination and graceful exit in the future.
//This program is demo for using pthreads with libev.
//Try using Timeout values as large as 1.0 and as small as 0.000001
//and notice the difference in the output
//(c) 2009 debuguo
//(c) 2013 enthusiasticgeek for stack overflow
//Free to distribute and improve the code. Leave credits intact
//compile using: gcc -g test.c -o test -lpthread -lev
#include <ev.h>
#include <stdio.h> // for puts
#include <stdlib.h>
#include <pthread.h>
pthread_mutex_t lock;
double timeout = 0.00001;
ev_timer timeout_watcher;
int timeout_count = 0;
ev_async async_watcher;
int async_count = 0;
struct ev_loop* loop2;
void* loop2thread(void* args)
{
// now wait for events to arrive on the inner loop
ev_loop(loop2, 0);
return NULL;
}
static void async_cb (EV_P_ ev_async *w, int revents)
{
//puts ("async ready");
pthread_mutex_lock(&lock); //Don't forget locking
++async_count;
printf("async = %d, timeout = %d \n", async_count, timeout_count);
pthread_mutex_unlock(&lock); //Don't forget unlocking
}
static void timeout_cb (EV_P_ ev_timer *w, int revents) // Timer callback function
{
//puts ("timeout");
if(ev_async_pending(&async_watcher)==false){ //the event has not yet been processed (or even noted) by the event loop? (i.e. Is it serviced? If yes then proceed to)
ev_async_send(loop2, &async_watcher); //Sends/signals/activates the given ev_async watcher, that is, feeds an EV_ASYNC event on the watcher into the event loop.
}
pthread_mutex_lock(&lock); //Don't forget locking
++timeout_count;
pthread_mutex_unlock(&lock); //Don't forget unlocking
w->repeat = timeout;
ev_timer_again(loop, &timeout_watcher); //Start the timer again.
}
int main (int argc, char** argv)
{
if (argc < 2) {
puts("Timeout value missing.\n./demo <timeout>");
return -1;
}
timeout = atof(argv[1]);
struct ev_loop *loop = EV_DEFAULT; //or ev_default_loop (0);
//Initialize pthread
pthread_mutex_init(&lock, NULL);
pthread_t thread;
// This loop sits in the pthread
loop2 = ev_loop_new(0);
//This block is specifically used pre-empting thread (i.e. temporary interruption and suspension of a task, without asking for its cooperation, with the intention to resume that task later.)
//This takes into account thread safety
ev_async_init(&async_watcher, async_cb);
ev_async_start(loop2, &async_watcher);
pthread_create(&thread, NULL, loop2thread, NULL);
ev_timer_init (&timeout_watcher, timeout_cb, timeout, 0.); // Non repeating timer. The timer starts repeating in the timeout callback function
ev_timer_start (loop, &timeout_watcher);
// now wait for events to arrive on the main loop
ev_loop(loop, 0);
//Wait on threads for execution
pthread_join(thread, NULL);
pthread_mutex_destroy(&lock);
return 0;
}
Can someone help me to complete my code with a function that can check the result of a timer "check_timer" and another one that reset this timer if it had expired "reset_timer"?
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <pthread.h>
#include <errno.h>
#include <sys/time.h>
#define GLOBAL_TIMER 8 * 60 * 60
typedef struct protocol_type {
int protocol_number;
char *protocol_name;
pthread_t thread_timer_id;
} protocol_type_t;
pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
/* call back function - inform the user the time has expired */
void timeout_call_back(pthread_t thread_id)
{
printf("Welcome thread %ld, your time is up ===\n", pthread_self());
// doing some other stuff
}
/* Go to sleep for a period of seconds */
static void* start_timer(void *args)
{
/* function pointer */
void (*finish_function)(pthread_t);
int seconds = *((int*) args);
finish_function = timeout_call_back;
struct timeval now;
struct timespec timeout;
pthread_mutex_lock(&mut);
printf("thread ID : %ld, are waiting for %d seconds to to expire\n", pthread_self(), seconds);
gettimeofday(&now, NULL);
timeout.tv_sec = now.tv_sec + seconds;
timeout.tv_nsec = now.tv_usec * 1000;
pthread_cond_timedwait(&cond, &mut, &timeout);
(*finish_function)(pthread_self());
pthread_mutex_unlock(&mut);
pthread_exit(NULL);
}
// This function is in MT environnement and is running inside a daemon
int received_buffer_parser(char *received_buffer) {
pthread_mutex_t mut_main = PTHREAD_MUTEX_INITIALIZER;
protocol_type_t *my_protocol;
// Identify protocol type
my_protocol = protocol_identifier(received_buffer);
// Check if i received it in the last 8 hours in safe
pthread_mutex_lock(&mut_main);
if (check_timer(my_protocol->thread_id) has expired) { // I want to write this function
// I want to reset thread timer
launch_new_timer(my_protocol->thread_id)
}
else {
// doing some stuff
// dump data to the disk
}
pthread_mutex_unlock(&mut_main);
return 0;
}
int launch_new_timer(pthread_t thread_id)
{
int rc;
int seconds = GLOBAL_TIMER;
rc = pthread_create(&thread_id, NULL, start_timer, (void *) &seconds);
if(rc)
printf("Failed to create thread1\n");
pthread_join(thread_id, NULL);
return 0;
}
Clarification
I clarify here the real context of my code:
I receive from the network some types of different protocols packets(ICMP, SSH, RIP, OSPF, BGP...), and i want to:
identify every type of packets, let say with : identify_protocol(char *received_buffer), I got this function, it's ok.
Check if i receive this type of protocols in the last 8 hours (THE TIMER OF EACH PROTOCOL TYPE EXPIRE AFTER 8 HOURS), two choices:
a. if so, I dump the result data into a specific file on the disk.
b. no, I didn't receive this type in the last 8 HOURS i create a new thread (in my code i simplify, with thread1, thread2 and thread3, this threads are 3 threads used to be a timers for three protocols types) - i start a new timer with : start_timer(void *args) function do this job.
My main question is how to be able to check the result of my timers in a safe manner and then decide i reset the timer or not.
I design the finish_function at the beginning to help me to check when the timer has expired.
Feel free to give me a better design for best performances for my program.
My system is Linux.
To check for timers that merely expire, you don't need to use threads and synchronization at all. Simply keep global variables indicating the start time of the timer. So
when the timer starts, set a global variable (one per protocol) to gettimeofday()
when you want to check whether the timer has expired for a protocol, see whether gettimeofday()-starttime(protocol) is <8h
If you want to be notified on timer expiry, I recommend to use alarm(2). It only has second resolution, but that should be good enough for 8h timeouts. Whenever a timer is set, cancelled, or reset, compute the minimum timeout of any of the timers, then call alarm with that timeout. In the signal handler, perform the processing that you want to do on reception of timeout. Alternatively, do nothing in the signal handler, and just trust that any pending system call will be interrupted, and check all timers for expiry on EINTR.
Edit: alarm works like this
#include <unistd.h>
#include <signal.h>
void timeout(int ignored)
{
printf("timed out\n");
}
void main()
{
signal(SIGALRM, timeout);
alarm(10);
pause();
}
In my destructor I want to destroy a thread cleanly.
My goal is to wait for a thread to finish executing and THEN destroy the thread.
The only thing I found about querying the state of a pthread is pthread_attr_setdetachstate but this only tells you if your thread is:
PTHREAD_CREATE_DETACHED
PTHREAD_CREATE_JOINABLE
Both of those have nothing to do with whether the thread is still running or not.
How do you query a pthread to see if it is still running?
It sounds like you have two questions here:
How can I wait until my thread completes?
Answer: This is directly supported by pthreads -- make your thread-to-be-stopped JOINABLE (when it is first started), and use pthread_join() to block your current thread until the thread-to-be-stopped is no longer running.
How can I tell if my thread is still running?
Answer: You can add a "thread_complete" flag to do the trick:
Scenario: Thread A wants to know if Thread B is still alive.
When Thread B is created, it is given a pointer to the "thread_complete" flag address. The "thread_complete" flag should be initialized to NOT_COMPLETED before the thread is created. Thread B's entry point function should immediately call pthread_cleanup_push() to push a "cleanup handler" which sets the "thread_complete" flag to COMPLETED.
See details about cleanup handlers here: pthread cleanup handlers
You'll want to include a corresponding pthread_cleanup_pop(1) call to ensure that the cleanup handler gets called no matter what (i.e. if the thread exits normally OR due to cancellation, etc.).
Then, Thread A can simply check the "thread_complete" flag to see if Thread B has exited yet.
NOTE: Your "thread_complete" flag should be declared "volatile" and should be an atomic type -- the GNU compilers provide the sig_atomic_t for this purpose. This allows the two threads consistent access the same data without the need for synchronization constructs (mutexes/semaphores).
pthread_kill(tid, 0);
No signal is sent, but error checking is still performed so you can use that to check
existence of tid.
CAUTION: This answer is incorrect. The standard specifically prohibits passing the ID of a thread whose lifetime has ended. That ID might now specify a different thread or, worse, it might refer to memory that has been freed, causing a crash.
I think all you really need is to call pthread_join(). That call won't return until the thread has exited.
If you only want to poll to see whether the thread is still running or not (and note that is usually not what you should be wanting to do!), you could have the thread set a volatile boolean to false just before it exits... then your main-thread could read the boolean and if it's still true, you know the thread is still running. (if it's false, on the other hand, you know the thread is at least almost gone; it may still be running cleanup code that occurs after it sets the boolean to false, though, so even in this case you should still call pthread_join before trying to free any resources the thread might have access to)
There is not fully portable solution, look if your platform supports pthread_tryjoin_np or pthread_timedjoin_np. So you just check if thread can be joined (of course created with PTHREAD_CREATE_JOINABLE).
Let me note on the "winning" answer, which has a huge hidden flaw, and in some contexts it can lead to crashes. Unless you use pthread_join, it will coming up again and again. Assume you are having a process and a shared library. Call the library lib.so.
You dlopen it, you start a thread in it. Assume you don't want it join to it, so you set it detachable.
Process and shared lib's logic doing its work, etc...
You want to load out lib.so, because you don't need it any more.
You call a shutdown on the thread and you say, that you want to read a flag afterwards from your lib.so's thread, that it have finished.
You continue on another thread with dlclose, because you see, that you have saw, that the flag is now showing the thread as "finished"
dlclose will load out all stack and code related memory.
Whops, but dlclose does not stop threads. And you know, even when you are in the last line of the cleanup handler to set the "thread is finished" volatile atomic flag variable, you still have to return from a lot of methods on the stack, giving back values, etc. If a huge thread priority was given to #5+#6's thread, you will receive dlclose before you could REALLY stop on the thread. You will have some nice crashes sometimes.
Let me point out, that this is not a hipothetical problem, I had the same issue on our project.
I believe I've come up with a solution that at least works for Linux. Whenever I create a thread I have it save it's LWP (Light Weight Process ID) and assign it a unique name, eg.
int lwp = syscall(SYS_gettid);
prctl(PR_SET_NAME, (long)"unique name", 0, 0, 0);
Then, to check if the thread exists later I open /proc/pid/task/lwp/comm and read it. If the file exists and it's contents match the unique name I assigned, the thread exists. Note that this does NOT pass a possibly defunct/reused TID to any library function, so no crashes.
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <pthread.h>
#include <sys/prctl.h>
#include <sys/file.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <syscall.h>
pthread_t subthread_tid;
int subthread_lwp;
#define UNIQUE_NAME "unique name"
bool thread_exists (pthread_t thread_id)
{
char path[100];
char thread_name[16];
FILE *fp;
bool thread_exists = false;
// If the /proc/<pid>/task/<lwp>/comm file exists and it's contents match the "unique name" the
// thread exists, and it's the original thread (TID has NOT been reused).
sprintf(path, "/proc/%d/task/%d/comm", getpid(), subthread_lwp);
fp = fopen(path, "r");
if( fp != NULL ) {
fgets(thread_name, 16, fp);
fclose(fp);
// Need to trim off the newline
thread_name[strlen(thread_name)-1] = '\0';
if( strcmp(UNIQUE_NAME, thread_name) == 0 ) {
thread_exists = true;
}
}
if( thread_exists ) {
printf("thread exists\n");
} else {
printf("thread does NOT exist\n");
}
return thread_exists;
}
void *subthread (void *unused)
{
subthread_lwp = syscall(SYS_gettid);
prctl(PR_SET_NAME, (long)UNIQUE_NAME, 0, 0, 0);
sleep(10000);
return NULL;
}
int main (int argc, char *argv[], char *envp[])
{
int error_number;
pthread_create(&subthread_tid, NULL, subthread, NULL);
printf("pthread_create()\n");
sleep(1);
thread_exists(subthread_tid);
pthread_cancel(subthread_tid);
printf("pthread_cancel()\n");
sleep(1);
thread_exists(subthread_tid);
error_number = pthread_join(subthread_tid, NULL);
if( error_number == 0 ) {
printf("pthread_join() successful\n");
} else {
printf("pthread_join() failed, %d\n", error_number);
}
thread_exists(subthread_tid);
exit(0);
}
#include <string.h>
#include <stdio.h>
#include <pthread.h>
#include <signal.h>
#include <unistd.h>
void* thread1 (void* arg);
void* thread2 (void* arg);
int main()
{
pthread_t thr_id;
pthread_create(&thr_id, NULL, thread1, NULL);
sleep(10);
}
void* thread1 (void* arg)
{
pthread_t thr_id = 0;
pthread_create(&thr_id, NULL, thread2, NULL);
sleep(5);
int ret = 0;
if( (ret = pthread_kill(thr_id, 0)) == 0)
{
printf("still running\n");
pthread_join(thr_id, NULL);
}
else
{
printf("RIP Thread = %d\n",ret);
}
}
void* thread2 (void* arg)
{
// sleep(5);
printf("I am done\n");
}