I have a multithreades process that has to control the execution of one other process.
To do so, from one of the threads I use Ptrace.
This is how the tracee is created and launched.
switch( childPID=fork() ){
case -1:
perror("fork()");
return -1;
case 0 :
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
execve(execPath,NULL,NULL);
return -1;
default:
break;
}
This is how the process is run
while (1) {
ptrace(PTRACE_CONT, childPID, 0, 0);
waitpid( childPID, &status, 0);
// inspect status and break in some cases
...
...
}
I have a similar non multithreades application that works perfectly, load exec and inspect stack and memory without problems. But when I try this configuration on the multithreades one the process I create does not run at all.
My question is. How can I trace a process from a thread ? Do I have to change the way I attach the process?
The code at the end of the post is one answer to the question.
You can have a thread that trace a process.
If someone is interested, the problem I was experimenting was that, for some unintelligible reasons, the tracer thread was not the one sending all the tracing commands. One of them was calling the fork and having the responsibility of trace, one other was sending
ptrace(PTRACE_CONT, childPID, 0, 0);
ptrace (PTRACE_GETREGS, childPID, 0, registers);
and the resulting error was: ptrace (PTRACE_GETREGS,..) Couldn't get registers: No such process
#include <pthread.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/reg.h>
#include <sys/user.h>
#define NUM_THREADS 9
int childPID;
int fatherPID;
void print_registers(struct user_regs_struct *registers){
printf("\tReg ebx 0x%lx\n",registers->ebx);
printf("\tReg ecx 0x%lx\n",registers->ecx);
printf("\tReg edx 0x%lx\n",registers->edx);
printf("\tReg esi 0x%lx\n",registers->esi);
printf("\tReg edi 0x%lx\n",registers->edi);
printf("\tReg ebp 0x%lx\n",registers->ebp);
printf("\tReg eax 0x%lx\n",registers->eax);
printf("\tReg xds 0x%lx\n",registers->xds);
printf("\tReg xes 0x%lx\n",registers->xes);
printf("\tReg xfs 0x%lx\n",registers->xfs);
printf("\tReg xgs 0x%lx\n",registers->xgs);
printf("\tReg orig_eax 0x%lx\n",registers->orig_eax);
printf("\tReg eip 0x%lx\n",registers->eip);
printf("\tReg xcs 0x%lx\n",registers->xcs);
printf("\tReg eflags 0x%lx\n",registers->eflags);
printf("\tReg esp 0x%lx\n",registers->esp);
printf("\tReg xss 0x%lx\n",registers->xss);
}
int load(char * execPath){
switch( childPID=fork() ){
case -1:
perror("fork()");
return -1;
case 0 :
if( access(execPath, X_OK)==-1){
printf("\tAcces denied to\n",execPath);
}
else {
printf("\tChild Process pid :%d %d\n",childPID,getpid());
if(ptrace(PTRACE_TRACEME, 0, NULL, NULL)<0){
perror("ptrace(PTRACE_TRACEME)");
return -1;
}
execve(execPath,NULL,NULL);
perror("execve()");
}
return -1;
default:
wait(NULL);
fatherPID=getpid();
printf("\tParent Process pid :%d %d\n",fatherPID,childPID);
if (ptrace(PTRACE_SETOPTIONS, childPID, 0, PTRACE_O_TRACEEXIT)){
perror("stopper: ptrace(PTRACE_SETOPTIONS, ...)");
return -1;
}
break;
}
return -1;
}
void registers(){
printf("\t##Command get_registers#\n");
struct user_regs_struct * registers = (struct user_regs_struct*)(calloc(1, sizeof(struct user_regs_struct)));
long ret = ptrace (PTRACE_GETREGS, childPID, 0, registers);
if (ret <0) perror("ptrace (PTRACE_GETREGS,..) Couldn't get registers");
print_registers(registers);
free(registers);
}
int continuE(){
int status = 0;
int signo;
long long_var=0;
// to continue the execution is needed to trigger the event
while (1) {
ptrace(PTRACE_CONT, childPID, 0, 0);
waitpid( childPID, &status, 0);
if (WIFEXITED(status))
printf("Child exited by %d\n",WEXITSTATUS(status));
if (WIFSIGNALED(status))
printf(" child process terminated by a signal %d \n",WTERMSIG(status) );
if (WIFSTOPPED(status)) {
signo = WSTOPSIG(status);
//printf("Child stopped by %d\n",signo);
}
// we had the sigtrap and we are at the exec
if (status>>8 == (SIGTRAP | (PTRACE_EVENT_EXEC<<8))){
printf("\t###Stopped the tracee at EXEC, with status %d###\n",WEXITSTATUS(status));
ptrace(PTRACE_GETEVENTMSG, childPID,0,&long_var);
printf("\t###PTRACE_GETEVENTMSG result %lu ,%d ###\n",long_var,WEXITSTATUS(long_var));
}
// we have a sigtrap and we are on the exit
// we could think to take out PTRACE_O_TRACEEXIT
if (status>>8 == (SIGTRAP | (PTRACE_EVENT_EXIT<<8))){
printf("\t###Stopped the tracee at EXIT###\n");
signo= SIGHUP;
}
// normal cases
if ((signo == SIGTRAP) || (signo == SIGTERM) ||(signo ==SIGINT) || (signo == SIGHUP)
|| ( signo == SIGSEGV) ){
break;
}
}
return signo;
}
void *work(void *threadid)
{
long tid;
tid = (long)threadid;
printf("Hello World! It's me, thread #%ld!\n", tid);
load("/home/rtems/plibeagleeye/Plib/Tests/bin/stanford.o");
registers();
continuE();
registers();
pthread_exit(NULL);
}
void *work2(void *threadid)
{
long tid;
tid = (long)threadid;
printf("Hello World! It's me, thread #%ld!\n", tid);
pthread_exit(NULL);
}
int main (int argc, char *argv[])
{
pthread_t threads[NUM_THREADS];
pthread_attr_t attr;
int rc;
long *taskids;
void *status;
taskids = (long *) malloc( NUM_THREADS * sizeof(long));
long t=0;
/* Initialize and set thread detached attribute */
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
taskids[t] = 0;
rc = pthread_create(&threads[t], &attr, work, (void *)taskids[t]);
for(t=1; t<NUM_THREADS; t++){
taskids[t] = t;
printf("Creating thread %ld\n", t);
rc = pthread_create(&threads[t], &attr, work2, (void *)taskids[t]);
if (rc){
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
pthread_attr_destroy(&attr);
for(t=0; t<NUM_THREADS; t++){
rc = pthread_join(threads[t], &status);
if (rc) {
printf("ERROR; return code from pthread_join() is %d\n", rc);
exit(-1);
}
printf("Main: completed join with thread %ld having a status of %ld\n",t,(long)status);
}
printf("Ciaoz all threads finished their jobs\n");
free(taskids);
/* Last thing that main() should do */
pthread_exit(NULL);
return 0;
}
The thing that really surprise me is that there is no indications on which thread is the tracer. ptrace(PTRACE_TRACEME, 0, NULL, NULL) the 0 seems to work perfectly.
In a multi-threaded application in order to trace the program you need to use ptrace for each and particular thread the parent process spawns by using ptrace(PTRACE_foo, pid, ...) where pid is the thread id of the process. In order to trace the parent itself then use ptrace with pid = 0 in the parent code. ptrace is strictly to a particular thread only.
hope u found what u were looking after...
[EDIT]
I've made a mistake concerning the question interpretation.
Answering the comment below: According with the manual PTRACE_TRACEME does not attach the tracee to the main thread but to the parent one.
PTRACE_TRACEME -- Indicate that this process is to be traced by its parent.
[THE OLD UNPROPER ANSWER]
The trace is per thread, you need attach each thread individually. Your code just attach to the main thread of the process called by execve.
from README-linux-ptrace:
Attachment and subsequent commands are per thread: in a multithreaded process, every thread can be individually attached to a (potentially different) tracer, or left not attached and thus not debugged. Therefore, "tracee" always means "(one) thread", never "a (possibly multithreaded) process".
You can do it catching the SIGTRAP signal (from ptrace man):
If the PTRACE_O_TRACEEXEC option is not in effect, all successful calls to execve(2) by the traced process will cause it to be sent a SIGTRAP signal, giving the parent a chance to gain control before the new program begins execution.
and using PTRACE_GETEVENTMSG to recover the pid:
Retrieve a message (as an unsigned long) about the ptrace event that just happened, placing it at the address data in the tracer. For PTRACE_EVENT_EXIT, this is the tracee's exit status. For PTRACE_EVENT_FORK, PTRACE_EVENT_VFORK, PTRACE_EVENT_VFORK_DONE, and PTRACE_EVENT_CLONE, this is the PID of the new process. (addr is ignored.)
and then using PTRACE_ATTACH for attach to the recovered new pid.
Related
First, start process B (see mt.cpp below) , it will create a thread with pthread_create(). The ppid, pid and tid of main thread and the new thread will be outputted for process A, then both of them start a for loop and raise SIGTRAP , which should be caught by waitpid() in process A.
Second, start process A (see attach.cpp below) with pid of process B. Process A will attach to process B by ptrace(PTRACE_ATTACH, ...), then wait signal event using waitpid() in while(true), call ptrace(PTRACE_CONT, ...) if get a SIGTRAP, or break the loop if get a SIGSTOP.
Now is the problem:
Process A can catch the SIGTRAP raised by main thread of process B and call ptrace(PTRACE_CONT, ...) successfully, and then process B will continue to execute as expected.
BUT!!!
When the new thread of process B raised SIGTRAP, process A failed to ptrace(PTRACE_CONT, ...) with a errmsg "No such process", because process B has core dumped with a errmsg "Trace/breakpoint trap (core dumped)".
In addition, WIFSTOPPED(status) turned to false and WIFSIGNALED(status) turned to true.
I know the default action of SIGTRAP is terminate the process, it seems that the SIGTRAP was transfered to process A after the termination action, NOT before, so process A had no chance to continue process B.
I have tried gdb instead of process A, both SIGTRAP can be caught and continued successfully. So there must be something wrong in the code of process A.
Here is attach.cpp executed as process A:
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
int main(int argc, char *argv[])
{
pid_t pid = 0;
int ret = 0;
int status = 0;
if (argc > 1) {
pid = atoi(argv[1]);
printf("pid=%d\n", pid);
}
ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL);
printf("attach ret=%d\n", ret);
waitpid(pid, &status, 0);
ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
printf("cont ret=%d\n", ret);
while (true) {
ret = waitpid(pid, &status, WUNTRACED);
printf("\nwaitpid ret=%d.\n", ret);
int sig = 0;
if (WIFSIGNALED(status)) {
printf("WIFSIGNALED\n");
sig = WTERMSIG(status);
} else if (WIFSTOPPED(status)) {
printf("WIFSTOPPED\n");
sig = WSTOPSIG(status);
} else {
printf("other status %d\n", status);
}
if (SIGTRAP == sig) {
ret = ptrace(PTRACE_CONT, pid, NULL, NULL);
printf("SIGTRAP cont ret=%d err=%s\n", ret, strerror(errno));
} else if (SIGSTOP == sig) {
ret = ptrace(PTRACE_DETACH, pid, NULL, NULL);
printf("SIGSTOP detach ret=%d\n", ret);
break;
} else {
printf("other signal %d\n", sig);
}
sleep(2);
}
return 0;
}
Here is mt.cpp executed as process B:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <signal.h>
#include <sys/syscall.h>
#define gettid() syscall(SYS_gettid)
void *func(void * arg)
{
printf("child ppid=%d pid=%d tid=%d\n", getppid(), getpid(), gettid());
int i = 0;
for (; i < 5; i++) {
printf("child loop i=%d\n", i);
sleep(2);
}
printf("\nchild before SIGTRAP\n", gettid());
raise(SIGTRAP);
printf("child after SIGTRAP\n\n", gettid());
for (; i < 8; i++) {
printf("child loop i=%d\n", i);
sleep(2);
}
return NULL;
}
int main(void)
{
printf("parent ppid=%d pid=%d tid=%d\n", getppid(), getpid(), gettid());
pthread_t tid;
pthread_create(&tid, NULL, func, NULL);
int i = 0;
for (; i < 3; i++) {
printf("parent loop i=%d\n", i);
sleep(2);
}
printf("\nparent before SIGTRAP\n", gettid());
raise(SIGTRAP);
printf("parent after SIGTRAP\n\n", gettid());
for (; i < 10; i++) {
printf("parent loop i=%d\n", i);
sleep(2);
}
pthread_join(tid, NULL);
return 0;
}
Here the result:
if you want execute the two programs by yourself, be sure that process A (attach) should be started as soon as possible after start process B.
process B:
$ ./mt
parent ppid=12238 pid=30389 tid=30389
parent loop i=0
child ppid=12238 pid=30389 tid=30390
child loop i=0
parent loop i=1
child loop i=1
parent loop i=2
child loop i=2
parent before SIGTRAP
child loop i=3
parent after SIGTRAP
parent loop i=3
child loop i=4
parent loop i=4
child before SIGTRAP
Trace/breakpoint trap (core dumped)
process A:
$ ./attach 30389
pid=30389
attach ret=0
cont ret=0
waitpid ret=30389.
WIFSTOPPED
SIGTRAP cont ret=0 err=Success
waitpid ret=30389.
WIFSIGNALED
SIGTRAP cont ret=-1 err=No such process
^C
The Linux PTRACE_ATTACH request, despite its argument being named pid, will trace only that thread.
You can verify this by adding this function to your program and calling it in the two threads:
#define trprefix "TracerPid:"
int tracerpid()
{
char stfile[100], buf[512];
sprintf(stfile, "/proc/self/task/%d/status", (int)gettid());
int trpid = -1;
FILE *st = fopen(stfile, "r");
if (st != NULL) {
while (fgets(buf, sizeof buf, st) != NULL) {
if (strncmp(buf, trprefix, strlen(trprefix)) == 0)
trpid = atoi(buf+strlen(trprefix));
}
fclose(st);
}
return trpid;
}
You'll see that the parent thread's Tracer PID is that of your "attach" process, while the child thread's Tracer PID is 0.
When the child thread raises SIGTRAP, there's no tracer for the thread, so the default action for SIGTRAP will be taken - the whole process will be killed. That's why your tracer says that waitpid returned WIFSIGNALED.
To fix this:
In the "mt" program, move the call to pthread_create to be after the first delay loop, which will give you enough time to attach to the process before the new thread is created.
Add this to the "attach" program after the ptrace(PTRACE_ATTACH, ...); waitpid(...);:
errno = 0;
ret = ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACECLONE);
printf("setoptions ret=%d err=%s\n", ret, strerror(errno));
The PTRACE_O_TRACECLONE option will let your program trace every thread that the target creates with clone.
Convert all your waitpid(pid, ...) to waitpid(-1, ...) so that your program will wait for any thread.
I am writing a program to demonstrate signal handling in a secondary thread. In my program, main thread spawns 10 thread and each thread calls sigwait to wait for signal. But in my case, it is main thread which is handling signa. Code is given below:
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <string.h>
#include <pthread.h>
#include <sys/types.h>
#include <errno.h>
volatile sig_atomic_t cont = 1;
volatile sig_atomic_t wsig = 0;
volatile sig_atomic_t wtid = 0;
int GetCurrentThreadId()
{
return syscall(__NR_gettid);
}
void Segv1(int, siginfo_t *, void *)
{
//printf("SIGSEGV signal on illegal memory access handled by thread: %d\n", GetCurrentThreadId());
wtid = GetCurrentThreadId();
wsig = SIGSEGV;
_exit(SIGSEGV);
}
void Fpe1(int , siginfo_t *, void *)
{
wtid = GetCurrentThreadId();
wsig = SIGFPE;
_exit(SIGFPE);
}
void User1(int, siginfo_t *, void *)
{
wtid = GetCurrentThreadId();
wsig = SIGUSR1;
}
void* ThreadFunc (void*)
{
sigset_t sigs;
sigemptyset(&sigs);
sigaddset(&sigs, SIGUSR1);
sigaddset(&sigs, SIGSEGV);
sigaddset(&sigs, SIGFPE);
pthread_sigmask(SIG_BLOCK, &sigs, NULL);
//printf("Thread: %d starts\n", GetCurrentThreadId());
while(cont) {
//printf("Thread: %d enters into loop\n", GetCurrentThreadId());
//int s = sigwaitinfo(&sigs, NULL);
//int sig;
//int s = sigwait(&sigs, &sig);
//printf("A signal\n");
/*if(s==0) {
sigaddset(&sigs, sig);
printf("Signal %d handled from thread: %d\n", sig, GetCurrentThreadId());
if(sig==SIGFPE||sig==SIGSEGV)
return NULL;
} else {
printf("sigwaitinfo failed with %d\n", s);
break;
}*/
int s = sigsuspend(&sigs);
switch(wsig) {
case SIGSEGV:
printf("Segmenation fault in thread: %d Current thread id: %d\n", wtid, GetCurrentThreadId());
exit(1);
break;
case SIGFPE:
printf("Floating point exception in thread: %d Current thread id: %d\n", wtid, GetCurrentThreadId());
exit(1);
break;
case SIGUSR1:
printf("User 1 signal in thread: %d Current thread id: %d\n", wtid, GetCurrentThreadId());
break;
default:
printf("Unhandled signal: %d in thread: %d Current thread id: %d\n", wsig, wtid, GetCurrentThreadId());
break;
}
}
printf("Thread: %d ends\n", GetCurrentThreadId());
return NULL;
}
int main()
{
printf("My PID: %d\n", getpid());
printf("SIGSEGV: %d\nSIGFPE: %d\nSIGUSR1: %d\n", SIGSEGV, SIGFPE, SIGUSR1);
//Create a thread for signal
struct sigaction act;
memset(&act, 0, sizeof act);
act.sa_sigaction = User1;
act.sa_flags = SA_SIGINFO;
//Set Handler for SIGUSR1 signal.
if(sigaction(SIGUSR1, &act, NULL)<0) {
fprintf(stderr, "sigaction failed\n");
return 1;
}
//Set handler for SIGSEGV signal.
act.sa_sigaction = Segv1;
sigaction(SIGSEGV, &act, NULL);
//Set handler for SIGFPE (floating point exception) signal.
act.sa_sigaction = Fpe1;
sigaction(SIGFPE, &act, NULL);
sigset_t sset;
sigemptyset(&sset);
sigaddset(&sset, SIGUSR1);
sigaddset(&sset, SIGSEGV);
sigaddset(&sset, SIGFPE);
//pthread_sigmask(SIG_BLOCK, &sset, NULL);
const int numthreads = 10;
pthread_t tid[numthreads];
for(int i=0;i<numthreads;++i)
pthread_create(&tid[i], NULL, ThreadFunc, NULL);
sleep(numthreads/2);
int sleepval = 15;
int pid = fork();
if(pid) {
while(sleepval) {
sleepval = sleep(sleepval);
//It might get interrupted with signal.
switch(wsig) {
case SIGSEGV:
printf("Segmenation fault in thread: %d\n", wtid);
exit(1);
break;
case SIGFPE:
printf("Floating point exception in thread: %d\n", wtid);
exit(1);
break;
case SIGUSR1:
printf("User 1 signal in thread: %d\n", wtid);
break;
default:
printf("Unhandled signal: %d in thread: %d\n", wsig, wtid);
break;
}
}
} else {
for(int i=0;i<10;++i) {
kill(getppid(), SIGUSR1);
//If sleep is not used, signal SIGUSR1 will be handled one time in parent
//as other signals will be ignored while SIGUSR1 is being handled.
sleep(1);
}
return 0;
}
int * a = 0;
//*a = 1;
int c=0;
//c = 0;
int b = 1/c; //send SIGFPE signal.
return 0;
}
Is there any rule of picking up the thread for signal handling on Linux and Mac OS X? What should I do so that signal got handled in secondary thread?
In the above program, I am not able to handle the signal in secondary thread. What is wrong with it?
I suggest you should SIG_BLOCK needed signals in main thread (commented out now) and SIG_UNBLOCK them in other threads (SIG_BLOCK now). Or you can spawn you threads and after it SIG_BLOCK in main thread, as spawned threads got their sigmask from parent.
And sigsuspend's parameter is not the signals you want to wake up on, but vice versa.
Is there any rule of picking up the thread for signal handling on Linux and Mac OS X?
There is, see Signal Generation and Delivery:
During the time between the generation of a signal and its delivery or acceptance, the signal is said to be pending. Ordinarily, this interval cannot be detected by an application. However, a signal can be blocked from delivery to a thread. If the action associated with a blocked signal is anything other than to ignore the signal, and if that signal is generated for the thread, the signal shall remain pending until it is unblocked, it is accepted when it is selected and returned by a call to the sigwait() function, or the action associated with it is set to ignore the signal. Signals generated for the process shall be delivered to exactly one of those threads within the process which is in a call to a sigwait() function selecting that signal or has not blocked delivery of the signal. If there are no threads in a call to a sigwait() function selecting that signal, and if all threads within the process block delivery of the signal, the signal shall remain pending on the process until a thread calls a sigwait() function selecting that signal, a thread unblocks delivery of the signal, or the action associated with the signal is set to ignore the signal. If the action associated with a blocked signal is to ignore the signal and if that signal is generated for the process, it is unspecified whether the signal is discarded immediately upon generation or remains pending.
If I setup and signal handler for SIGABRT and meanwhile I have a thread that waits on sigwait() for SIGABRT to come (I have a blocked SIGABRT in other threads by pthread_sigmask).
So which one will be processed first ? Signal handler or sigwait() ?
[I am facing some issues that sigwait() is get blocked for ever. I am debugging it currently]
main()
{
sigset_t signal_set;
sigemptyset(&signal_set);
sigaddset(&signal_set, SIGABRT);
sigprocmask(SIG_BLOCK, &signal_set, NULL);
// Dont deliver SIGABORT while running this thread and it's kids.
pthread_sigmask(SIG_BLOCK, &signal_set, NULL);
pthread_create(&tAbortWaitThread, NULL, WaitForAbortThread, NULL);
..
Create all other threads
...
}
static void* WaitForAbortThread(void* v)
{
sigset_t signal_set;
int stat;
int sig;
sigfillset( &signal_set);
pthread_sigmask( SIG_BLOCK, &signal_set, NULL ); // Dont want any signals
sigemptyset(&signal_set);
sigaddset(&signal_set, SIGABRT); // Add only SIGABRT
// This thread while executing , will handle the SIGABORT signal via signal handler.
pthread_sigmask(SIG_UNBLOCK, &signal_set, NULL);
stat= sigwait( &signal_set, &sig ); // lets wait for signal handled in CatchAbort().
while (stat == -1)
{
stat= sigwait( &signal_set, &sig );
}
TellAllThreadsWeAreGoingDown();
sleep(10);
return null;
}
// Abort signal handler executed via sigaction().
static void CatchAbort(int i, siginfo_t* info, void* v)
{
sleep(20); // Dont return , hold on till the other threads are down.
}
Here at sigwait(), i will come to know that SIGABRT is received. I will tell other threads about it. Then will hold abort signal handler so that process is not terminated.
I wanted to know the interaction of sigwait() and the signal handler.
From sigwait() documentation :
The sigwait() function suspends execution of the calling thread until
one of the signals specified in the signal set becomes pending.
A pending signal means a blocked signal waiting to be delivered to one of the thread/process. Therefore, you need not to unblock the signal like you did with your pthread_sigmask(SIG_UNBLOCK, &signal_set, NULL) call.
This should work :
static void* WaitForAbortThread(void* v){
sigset_t signal_set;
sigemptyset(&signal_set);
sigaddset(&signal_set, SIGABRT);
sigwait( &signal_set, &sig );
TellAllThreadsWeAreGoingDown();
sleep(10);
return null;
}
I got some information from this <link>
It says :
To allow a thread to wait for asynchronously generated signals, the threads library provides the sigwait subroutine. The sigwait subroutine blocks the calling thread until one of the awaited signals is sent to the process or to the thread. There must not be a signal handler installed on the awaited signal using the sigwait subroutine.
I will remove the sigaction() handler and try only sigwait().
From the code snippet you've posted, it seems you got the use of sigwait() wrong. AFAIU, you need WaitForAbortThread like below:
sigemptyset( &signal_set); // change it from sigfillset()
for (;;) {
stat = sigwait(&signal_set, &sig);
if (sig == SIGABRT) {
printf("here's sigbart.. do whatever you want.\n");
pthread_kill(tid, signal); // thread id and signal
}
}
I don't think pthread_sigmask() is really needed. Since you only want to handle SIGABRT, first init signal_set as empty then simply add SIGABRT, then jump into the infinite loop, sigwait will wait for the particular signal that you're looking for, you check the signal if it's SIGABRT, if yes - do whatever you want. NOTE the uses of pthread_kill(), use it to sent any signal to other threads specified via tid and the signal you want to sent, make sure you know the tid of other threads you want to sent signal. Hope this will help!
I know this question is about a year old, but I often use a pattern, which solves exactly this issue using pthreads and signals. It is a little length but takes care of any issues I am aware of.
I recently used in combination with a library wrapped with SWIG and called from within Python. An annoying issue was that my IRQ thread waiting for SIGINT using sigwait never received the SIGINT signal. The same library worked perfectly when called from Matlab, which didn't capture the SIGINT signal.
The solution was to install a signal handler
#define _NTHREADS 8
#include <signal.h>
#include <pthread.h>
#include <unistd.h>
#include <sched.h>
#include <linux/unistd.h>
#include <sys/signal.h>
#include <sys/syscall.h>
#include <setjmp.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h> // strerror
#define CallErr(fun, arg) { if ((fun arg)<0) \
FailErr(#fun) }
#define CallErrExit(fun, arg, ret) { if ((fun arg)<0) \
FailErrExit(#fun,ret) }
#define FailErrExit(msg,ret) { \
(void)fprintf(stderr, "FAILED: %s(errno=%d strerror=%s)\n", \
msg, errno, strerror(errno)); \
(void)fflush(stderr); \
return ret; }
#define FailErr(msg) { \
(void)fprintf(stderr, "FAILED: %s(errno=%d strerror=%s)\n", \
msg, errno, strerror(errno)); \
(void)fflush(stderr);}
typedef struct thread_arg {
int cpu_id;
int thread_id;
} thread_arg_t;
static jmp_buf jmp_env;
static struct sigaction act;
static struct sigaction oact;
size_t exitnow = 0;
pthread_mutex_t exit_mutex;
pthread_attr_t attr;
pthread_t pids[_NTHREADS];
pid_t tids[_NTHREADS+1];
static volatile int status[_NTHREADS]; // 0: suspended, 1: interrupted, 2: success
sigset_t mask;
static pid_t gettid( void );
static void *thread_function(void *arg);
static void signalHandler(int);
int main() {
cpu_set_t cpuset;
int nproc;
int i;
thread_arg_t thread_args[_NTHREADS];
int id;
CPU_ZERO( &cpuset );
CallErr(sched_getaffinity,
(gettid(), sizeof( cpu_set_t ), &cpuset));
nproc = CPU_COUNT(&cpuset);
for (i=0 ; i < _NTHREADS ; i++) {
thread_args[i].cpu_id = i % nproc;
thread_args[i].thread_id = i;
status[i] = 0;
}
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
pthread_mutex_init(&exit_mutex, NULL);
// We pray for no locks on buffers and setbuf will work, if not we
// need to use filelock() on on FILE* access, tricky
setbuf(stdout, NULL);
setbuf(stderr, NULL);
act.sa_flags = SA_NOCLDSTOP | SA_NOCLDWAIT;
act.sa_handler = signalHandler;
sigemptyset(&act.sa_mask);
sigemptyset(&mask);
sigaddset(&mask, SIGINT);
if (setjmp(jmp_env)) {
if (gettid()==tids[0]) {
// Main Thread
printf("main thread: waiting for clients to terminate\n");
for (i = 0; i < _NTHREADS; i++) {
CallErr(pthread_join, (pids[i], NULL));
if (status[i] == 1)
printf("thread %d: terminated\n",i+1);
}
// On linux this can be done immediate after creation
CallErr(pthread_attr_destroy, (&attr));
CallErr(pthread_mutex_destroy, (&exit_mutex));
return 0;
}
else {
// Should never happen
printf("worker thread received signal");
}
return -1;
}
// Install handler
CallErr(sigaction, (SIGINT, &act, &oact));
// Block SIGINT
CallErr(pthread_sigmask, (SIG_BLOCK, &mask, NULL));
tids[0] = gettid();
srand ( time(NULL) );
for (i = 0; i < _NTHREADS; i++) {
// Inherits main threads signal handler, they are blocking
CallErr(pthread_create,
(&pids[i], &attr, thread_function,
(void *)&thread_args[i]));
}
if (pthread_sigmask(SIG_UNBLOCK, &mask, NULL)) {
fprintf(stderr, "main thread: can't block SIGINT");
}
printf("Infinite loop started - CTRL-C to exit\n");
for (i = 0; i < _NTHREADS; i++) {
CallErr(pthread_join, (pids[i], NULL));
//printf("%d\n",status[i]);
if (status[i] == 2)
printf("thread %d: finished succesfully\n",i+1);
}
// Clean up and exit
CallErr(pthread_attr_destroy, (&attr));
CallErr(pthread_mutex_destroy, (&exit_mutex));
return 0;
}
static void signalHandler(int sig) {
int i;
pthread_t id;
id = pthread_self();
for (i = 0; i < _NTHREADS; i++)
if (pids[i] == id) {
// Exits if worker thread
printf("Worker thread caught signal");
break;
}
if (sig==2) {
sigaction(SIGINT, &oact, &act);
}
pthread_mutex_lock(&exit_mutex);
if (!exitnow)
exitnow = 1;
pthread_mutex_unlock(&exit_mutex);
longjmp(jmp_env, 1);
}
void *thread_function(void *arg) {
cpu_set_t set;
thread_arg_t* threadarg;
int thread_id;
threadarg = (thread_arg_t*) arg;
thread_id = threadarg->thread_id+1;
tids[thread_id] = gettid();
CPU_ZERO( &set );
CPU_SET( threadarg->cpu_id, &set );
CallErrExit(sched_setaffinity, (gettid(), sizeof(cpu_set_t), &set ),
NULL);
int k = 8;
// While loop waiting for exit condition
while (k>0) {
sleep(rand() % 3);
pthread_mutex_lock(&exit_mutex);
if (exitnow) {
status[threadarg->thread_id] = 1;
pthread_mutex_unlock(&exit_mutex);
pthread_exit(NULL);
}
pthread_mutex_unlock(&exit_mutex);
k--;
}
status[threadarg->thread_id] = 2;
pthread_exit(NULL);
}
static pid_t gettid( void ) {
pid_t pid;
CallErr(pid = syscall, (__NR_gettid));
return pid;
}
I run serveral tests and the conbinations and results are:
For all test cases, I register a signal handler by calling sigaction in the main thread.
main thread block target signal, thread A unblock target signal by calling pthread_sigmask, thread A sleep, send target signal.
result: signal handler is executed in thread A.
main thread block target signal, thread A unblock target signal by calling pthread_sigmask, thread A calls sigwait, send target signal.
result: sigwait is executed.
main thread does not block target signal, thread A does not block target signal, thread A calls sigwait, send target signal.
result: main thread is chosen and the registered signal handler is executed in the main thread.
As you can see, conbination 1 and 2 are easy to understand and conclude.
It is:
If a signal is blocked by a thread, then the process-wide signal handler registered by sigaction just can't catch or even know it.
If a signal is not blocked, and it's sent before calling sigwait, the process-wide signal handler wins. And that's why APUE the books require us to block the target signal before calling sigwait. Here I use sleep in thread A to simulate a long "window time".
If a signal is not blocked, and it's sent when sigwait has already been waiting, sigwait wins.
But you should notice that for test case 1 and 2, main thread is designed to block the target signal.
At last for test case 3, when main thread is not blocked the target signal, and sigwait in thread A is also waiting, the signal handler is executed in the main thread.
I believe the behaviour of test case 3 is what APUE talks about:
From APUE ยง12.8:
If a signal is being caught (the process has established a signal
handler by using sigaction, for example) and a thread is waiting for
the same signal in a call to sigwait, it is left up to the
implementation to decide which way to deliver the signal. The
implementation could either allow sigwait to return or invoke the
signal handler, but not both.
Above all, if you want to accomplish one thread <-> one signal model, you should:
block all signals in the main thread with pthread_sigmask (subsequent thread created in main thread inheris the signal mask)
create threads and call sigwait(target_signal) with target signal.
test code
#define _POSIX_C_SOURCE 200809L
#include <signal.h>
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
FILE* file;
void* threadA(void* argv){
fprintf(file, "%ld\n", pthread_self());
sigset_t m;
sigemptyset(&m);
sigaddset(&m, SIGUSR1);
int signo;
int err;
// sigset_t q;
// sigemptyset(&q);
// pthread_sigmask(SIG_SETMASK, &q, NULL);
// sleep(50);
fprintf(file, "1\n");
err = sigwait(&m, &signo);
if (err != 0){
fprintf(file, "sigwait error\n");
exit(1);
}
switch (signo)
{
case SIGUSR1:
fprintf(file, "SIGUSR1 received\n");
break;
default:
fprintf(file, "?\n");
break;
}
fprintf(file, "2\n");
}
void hello(int signo){
fprintf(file, "%ld\n", pthread_self());
fprintf(file, "hello\n");
}
int main(){
file = fopen("daemon", "wb");
setbuf(file, NULL);
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_handler = hello;
sigaction(SIGUSR1, &sa, NULL);
sigset_t n;
sigemptyset(&n);
sigaddset(&n, SIGUSR1);
// pthread_sigmask(SIG_BLOCK, &n, NULL);
pthread_t pid;
int err;
err = pthread_create(&pid, NULL, threadA, NULL);
if(err != 0){
fprintf(file, "create thread error\n");
exit(1);
}
pause();
fprintf(file, "after pause\n");
fclose(file);
return 0;
}
run with ./a.out & (run in the background), and use kill -SIGUSR1 pid to test. Do not use raise. raise, sleep, pause are thread-wide.
What I'm trying to do is have a child process run in the background while the parent goes and does something else. When the child returns, I'd like for the parent to get that status and do something with it. However, I don't want to explicitly wait for the child at any point.
I looked into the WNOHANG option of waitpid but this seems to be a little different than what I'm looking for. WNOHANG just only gets the status if it's done, otherwise it moves on. Ideally, I'd like some option that will not wait for the process, but will jump back and grab the return status when it's done.
Is there any way to do this?
Code example:
pid_t p = fork();
if (p == 0){
value = do_child_stuff();
return(value);
}
if (p > 0){
captureStatus(p, &status); //NOT A REAL FUNCTION
// captureStatus will put p's exit status in status
// whenever p returns, without waiting or pausing for p
//do other stuff.....
}
Is there any way to simulate the behavior of captureStatus?
You could establish a signal handler for SIGCHLD and wait for the process once that triggers (it will trigger when the child terminates or is killed).
However, be aware that very few useful things can be done in a signal handler. Everything must be async-signal-safe. The standard specifically mentions wait and waitpid as safe.
Here's the proper way (or at least one proper way) to create an asynchronous wait out of a synchronous one:
struct waitpid_async_args {
pid_t pid;
int *status;
int flags;
sem_t sem, *done;
int *err;
};
static void *waitpid_async_start(void *arg)
{
struct waitpid_async_args *a = arg;
pid_t pid = a->pid;
int *status = a->status, flags = a->flags, *err = a->err;
sem_post(&a->sem);
if (waitpid(pid, status, flags) < 0) *err = errno;
else *err = 0;
sem_post(a->done);
pthread_detach(pthread_self());
return 0;
}
int waitpid_async(pid_t pid, int *status, int flags, sem_t *done, int *err)
{
struct waitpid_async_args a = { .pid = pid, .status = status,
.flags = flags, .done = done, .err = err };
sigset_t set;
pthread_t th;
int ret;
sem_init(&a.sem, 0, 0);
sigfillset(&set);
pthread_sigmask(SIG_BLOCK, &set, &set);
ret = pthread_create(&th, 0, waidpid_async_start, &a);
if (!ret) sem_wait(&a.sem);
pthread_sigmask(SIG_SETMASK, &set, 0);
return ret;
}
Note that the asynchronous function takes as an extra argument a semaphore it will post to flag that it's done. You could just examine status, but without a synchronization object there's no formal guarantee of memory ordering, so it's better to use an approach like this and call sem_trywait or sem_timedwait with a timeout to check whether the status is available yet before accessing it.
You can used shared memory IPC functions like shmget and shmat to communicate between the two processes. This can be non-blocking and may work well for producer/consumer models like the one you are describing. You still will have to poll, though.
shmget should be called before the fork to create the shared memory block.
Then you can use shmat after the fork to get a pointer to the shared memory and just use it as a buffer from there on.
When finished call shmdt on both child and parent to detach, and shmctl to remove the shared memory.
There is an example on the web here.
I like Vyktor's solution - in the parent process, start a thread to block on the child process and set the status variable when it's done. Here is an implementation:
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <pthread.h>
struct return_monitor {
pid_t pid;
int *return_status;
};
static void
captureStatus(struct return_monitor *monitor)
{
printf("Monitor thread started...\n");
int return_status;
waitpid(monitor->pid, &return_status, 0);
*(monitor->return_status) = WEXITSTATUS(return_status);
}
int
main()
{
printf("Parent process started...\n");
pid_t p = fork();
if (p == 0){
/* child */
printf("Child process started...\n");
int i;
for (i = 0; i < 10; ++i) {
sleep(1);
printf("Child iteration %d...\n", i);
}
/* arbitrary return value that will be recognizable in parent */
return(3);
}
if (p > 0){
int child_return_status = -1;
struct return_monitor monitor = {p, &child_return_status};
pthread_t monitor_thread;
pthread_create(&monitor_thread, NULL, captureStatus, &monitor);
int i;
for (i = 0; i < 10; ++i) {
sleep(2);
printf("Parent process iteration %d (Child return status %d)...\n",
i, child_return_status);
}
/* captureStatus(p, &status); */
}
}
I am confused about use of multiple mutexes in C.
int main() {
pthread_t thread1;
char *message1 = "Thread 1";
int r;
pthread_mutex_init(&mutex1, NULL);
pthread_mutex_init(&mutex2, NULL);
pthread_mutex_lock(&mutex1);
r = pthread_create( &thread1, NULL, print_message_function, (void*) message1);
printf("Parent 1\n");
pthread_mutex_lock(&mutex2);
printf("Parent 2\n");
pthread_mutex_unlock(&mutex2);
pthread_mutex_unlock(&mutex1);
pthread_join( thread1, NULL);
printf("Thread 1 returns: %d\n",r);
return 0;
}
void *print_message_function( void *str ) {
pthread_mutex_lock(&mutex1);
char *message;
message = (char *) str;
printf("Child 1 received message: %s \n", message);
pthread_mutex_lock(&mutex2);
printf("child 2\n");
pthread_mutex_unlock(&mutex2);
pthread_mutex_unlock(&mutex1);
return NULL;
}
output is
Parent 1
Parent 2
Child 1 received message: Thread 1
child 2
Thread 1 returns: 0
what i want is
Parent 1
Child 1 received message: Thread 1
Parent 2
child 2
Thread 1 returns: 0
When you call pthread_create you have already locked mutex1. That means that every other thread that calls pthread_mutex_lock(&mutex1); will wait for the mutex to be unlocked. That is what happen when you create a second thread: mutex1 is already locked, so the second thread cannot enter the critical section but need to wait for the mutex to be unlocked. That happens at the end of the main function.
You'll need to reorganize your code to get the output you desire.
However, to obtain such a result you should check synchronization systems, such semaphores or condition variables; they will provide a clearer and easier way to synchronize threads.
You may also check this tutorial: POSIX Threads Programming
A simple solution using semaphores (not tested, but it should work):
#include <stdio.h>
#include <semaphore.h>
sem_t sem1, sem2;
void* f(void* str) {
sem_wait(&sem1);
printf("Child 1 received message: %s \n",(char*)str);
sem_post(&sem2);
sem_wait(&sem1);
printf("Child 2\n");
return NULL;
}
int main (int argc, const char * argv[]) {
pthread_t thread;
char* message = "Thread 1";
int r;
sem_init(&sem1,0,0);
sem_init(&sem2,0,0);
r = pthread_create(&thread, NULL, f, (void*)message);
sem_post(&sem1);
sem_wait(&sem2);
printf("Parent 2\n");
sem_post(&sem1);
pthread_join(thread1, NULL);
printf("Thread 1 returns: %d\n",r);
return 0;
}
Mutexes alone aren't suitable for performing the kind of closely-interlocked execution that you want - their normal use is for protecting access to a shared data structure. This is because they're designed for saying "Thing A shouldn't happen at the same time as Thing B", but they don't say anything about whether Thing A or Thing B happens first or second.
You could use mutexes and condition variables, but in this case your problem is most closely matched by a pthreads Barrier object:
#include <stdio.h>
#include <pthread.h>
pthread_barrier_t barrier;
void *print_message_function( void *str )
{
char *message;
message = (char *) str;
pthread_barrier_wait(&barrier); /* Barrier point 1 */
/* (wait until parent prints first message) */
printf("Child 1 received message: %s \n", message);
pthread_barrier_wait(&barrier); /* Barrier point 2 */
/* (allow parent to proceed and print second message) */
pthread_barrier_wait(&barrier); /* Barrier point 3 */
/* (wait for parent to print second message) */
printf("child 2\n");
return NULL;
}
int main()
{
pthread_t thread1;
char *message1 = "Thread 1";
int r;
pthread_barrier_init(&barrier, NULL, 2);
r = pthread_create( &thread1, NULL, print_message_function, (void*) message1);
printf("Parent 1\n");
pthread_barrier_wait(&barrier); /* Barrier point 1 */
/* (allow child to proceed and print first message) */
pthread_barrier_wait(&barrier); /* Barrier point 2 */
/* (wait for child to print first message) */
printf("Parent 2\n");
pthread_barrier_wait(&barrier); /* Barrier point 3 */
/* (allow child to proceed and print second message) */
pthread_join( thread1, NULL);
/* (wait for child to exit) */
printf("Thread 1 returns: %d\n",r);
return 0;
}
Note that it is not usual to try to tightly interlock the execution of threads in this way - really, you've gone to great pains to ensure that the threads don't execute in parallel at all, which is the whole point of threads in the first place. If you find yourself doing this in a real project, it's a sign that you ought to carefully re-think your design.
I think you need to unlock mutex1 sooner. You unlock it after the printf("Parent 2\n"); so thread1 is still locked waiting for pthread_mutex_lock(&mutex1);.
When thread1 starts it's first step is to lock while it waits for mutual exclusion (clue's in the name) lock on mutex1. So it's paused.
Then you :
printf("Parent 1\n");
pthread_mutex_lock(&mutex2); <-- lock 2 is unleased but thread one is waiting on mutex1
printf("Parent 2\n");