Stack unwinding on HP-UX and Linux - c

I need to get the stack information of my C application in certain points. I've read the documentation and searched the Net but still cannot figure out how I can do it. Can you point to a simple process explanation? Or, even better, to an example of stack unwinding. I need it for HP-UX (Itanium) and Linux.

Check out linux/stacktrace.h
Here is an API reference:
http://www.cs.cmu.edu/afs/cs/Web/People/tekkotsu/dox/StackTrace_8h.html
Should work on all Linux kernels
Here is an alternative example in C from
http://www.linuxjournal.com/article/6391
#include <stdio.h>
#include <signal.h>
#include <execinfo.h>
void show_stackframe() {
void *trace[16];
char **messages = (char **)NULL;
int i, trace_size = 0;
trace_size = backtrace(trace, 16);
messages = backtrace_symbols(trace, trace_size);
printf("[bt] Execution path:\n");
for (i=0; i<trace_size; ++i)
printf("[bt] %s\n", messages[i]);
}
int func_low(int p1, int p2) {
p1 = p1 - p2;
show_stackframe();
return 2*p1;
}
int func_high(int p1, int p2) {
p1 = p1 + p2;
show_stackframe();
return 2*p1;
}
int test(int p1) {
int res;
if (p1<10)
res = 5+func_low(p1, 2*p1);
else
res = 5+func_high(p1, 2*p1);
return res;
}
int main() {
printf("First call: %d\n\n", test(27));
printf("Second call: %d\n", test(4));
}

You want to look at libunwind - this is a cross-platform library developed originally by HP for unwinding Itanium stack traces (which are particularly complex); but has subsequently been expanded to many other platforms; including both x86-Linux and Itanium-HPUX.
From the libunwind(3) man page; here is an example of using libunwind to write a typical 'show backtrace' function:
#define UNW_LOCAL_ONLY
#include <libunwind.h>
void show_backtrace (void) {
unw_cursor_t cursor; unw_context_t uc;
unw_word_t ip, sp;
unw_getcontext(&uc);
unw_init_local(&cursor, &uc);
while (unw_step(&cursor) > 0) {
unw_get_reg(&cursor, UNW_REG_IP, &ip);
unw_get_reg(&cursor, UNW_REG_SP, &sp);
printf ("ip = %lx, sp = %lx\n", (long) ip, (long) sp);
}
}

This shoulw work for HPUX itanium:
http://docs.hp.com/en/B9106-90012/unwind.5.html
For simple stack trace, try U_STACK_TRACE().

Related

On ARM macOS when explicitly raise()-ing a signal, some return addresses are garbled on the stack

Here's a simple program for ARM macOS that installs a signal handler for SIGSEGV, then generates one. In the signal handler function, the stack is walked with the usual frame pointer chasing algorithm, then the symbolized version is printed out:
#include <stdio.h>
#include <signal.h>
#include <unistd.h>
#include <execinfo.h>
#include <stdlib.h>
void handler(int signum, siginfo_t* siginfo, void* context)
{
__darwin_ucontext* ucontext = (__darwin_ucontext*) context;
__darwin_mcontext64* machineContext = ucontext->uc_mcontext;
uint64_t programCounter = machineContext->__ss.__pc;
uint64_t framePointer = machineContext->__ss.__fp;
void* bt[100];
int n = 0;
while (framePointer != 0) {
bt[n] = (void*)programCounter;
programCounter = *(uint64_t*)(framePointer + 8);
framePointer = *(uint64_t*)(framePointer);
++n;
}
char** symbols = backtrace_symbols(bt, n);
printf ("Call stack:\n");
for (int i = 0; i < n; ++i) {
printf ("\t %s\n", symbols[i]);
}
free (symbols);
abort ();
}
void Crash ()
{
raise (SIGSEGV);
//*(volatile int*)0 = 0;
}
int main()
{
struct sigaction sigAction;
sigAction.sa_sigaction = handler;
sigAction.sa_flags = SA_SIGINFO;
sigaction (SIGSEGV, &sigAction, nullptr);
Crash ();
}
This works fine when a "regular" SIGSEGV happens, but when it's raised explicitly, return values on the stack seem garbled, specifically, the upper part seems to contain garbage:
Call stack:
0 libsystem_kernel.dylib 0x0000000185510e68 __pthread_kill + 8
1 libsystem_c.dylib 0x116a000185422e14 raise + [...] // Should be 0x0000000185422e14
2 SignalHandlerTest 0x8f6a000104bc3eb8 _Z5Crashv + [...] // Should be 0x0000000104bc3eb8
3 SignalHandlerTest 0x0000000104bc3ef8 main + 56
4 libdyld.dylib 0x0000000185561450 start + 4
The behavior is the same regardless of which signal is raised. What am I missing?
As #Codo has correctly identified, this is PAC.
The upper bits of the address are not garbled, but rather contain a salted hash of the register's lower bits.
And contrary to your claims, this happens with regular segfaults too. For example, calling fprintf(NULL, "a"); results in:
Call stack:
0 libsystem_c.dylib 0x000000019139d8a0 flockfile + 28
1 libsystem_c.dylib 0x1d550001913a5870 vfprintf_l + 2113595600120315944
2 libsystem_c.dylib 0x341c80019139efd0 fprintf + 3755016926808506440
3 t 0x5f29000100483e9c Crash + 6857011907648290844
4 t 0x0000000100483edc main + 56
5 libdyld.dylib 0x00000001914b1430 start + 4
This is because all system binaries, including libraries, are compiled for the arm64e ABI and will make use of PAC. Now, your binary is running as a regular old arm64 binary and would crash if it passed an unsigned function pointer to a library function, or got a signed one returned. So the kernel actually disables 3 of the 4 keys that your process can use (IA, IB, DA and DB). But one of those, IB, is used solely for stack frames and so that one is left enabled even in arm64 binaries.
The reason why some return addresses are still not signed though is:
The main + 56 and start + 4 were pushed by your code, which is arm64 and hence doesn't sign them.
The flockfile + 28 is the instruction that crashed, whose address was never pushed to the stack, but extracted from the thread state.
So everything's working exactly as it's supposed to.
Edit:
After attempting to use this to aid me in debugging myself, I find the PAC'ed addresses to be annoying after all. You commented about ptrauth_strip in ptrauth.h, but that will actually not work inside an arm64 process (it's aliased to a macro that does nothing), nor will __builtin_ptrauth_strip (the compiler will error out).
The compiler won't even let you use a raw xpaci instruction when targeting arm64, but nothing on the hardware level prevents the instruction from working, so you can still manually inject the opcode.
Based on this, I wrote a signal handler that properly strips PAC signatures from an arm64 process:
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <execinfo.h>
#ifdef __arm64__
extern void* xpaci(uint64_t pc);
__asm__
(
"_xpaci:\n"
" mov x1, x30\n"
" mov x30, x0\n"
" .4byte 0xd50320ff\n" // xpaclri
" mov x0, x30\n"
" ret x1\n"
);
#else
static inline void* xpaci(uint64_t pc)
{
return (void*)pc;
}
#endif
static void handler(int signum, siginfo_t *siginfo, void *ctx)
{
_STRUCT_MCONTEXT64 *mctx = ((_STRUCT_UCONTEXT*)ctx)->uc_mcontext;
#ifdef __arm64__
uint64_t orig_pc = mctx->__ss.__pc;
uint64_t orig_fp = mctx->__ss.__fp;
#elif defined(__x86_64__)
uint64_t orig_pc = mctx->__ss.__rip;
uint64_t orig_fp = mctx->__ss.__rbp;
#else
# error "Unknown arch"
#endif
uint64_t pc = orig_pc;
uint64_t fp = orig_fp;
size_t n = 0;
while(1)
{
if(!xpaci(pc))
{
break;
}
++n;
if(!fp)
{
break;
}
pc = ((uint64_t*)fp)[1];
fp = ((uint64_t*)fp)[0];
}
void **bt = malloc(n * sizeof(void*));
if(!bt)
{
fprintf(stderr, "malloc: %s\n", strerror(errno));
exit(-1);
}
pc = orig_pc;
fp = orig_fp;
for(size_t i = 0; i < n; ++i)
{
bt[i] = xpaci(pc);
if(!fp)
{
break;
}
pc = ((uint64_t*)fp)[1];
fp = ((uint64_t*)fp)[0];
}
char **sym = backtrace_symbols(bt, n);
fprintf(stderr, "Caught signal with call stack:\n");
for(size_t i = 0; i < n; ++i)
{
fprintf(stderr, "%s\n", sym[i]);
}
free(sym);
free(bt);
exit(-1);
}
It uses xpaclri rather than xpaci, since the former is a NOP on arm64 (non-arm64e) hardware while the latter would be undefined.

are arguments required to build pslib code?

I have pslib installed and in the latest version on an ubuntu system.
the library is installed at: "/usr/include/libps/pslib.h"
when I try compiling, the postscript PS objects are not recognized.
...
/usr/bin/ld: draw.c:(.text+0x1868): undefined reference to `PS_stroke'
...
and so on. I don't see any thing on the pslib webpage, about needing to include the library in the gcc build command.
what do I need to do to build C code with pslib? I am on Ubuntu Linux
#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <locale.h>
#include <libps/pslib-mp.h>
void * my_malloc(PSDoc *p, size_t size, const char *caller) {
void *a;
a = (void *) malloc(size);
// printf("Allocating %d bytes at 0x%X (%s)\n", size, a, caller);
return(a);
}
void * my_realloc(PSDoc *p, void *mem, size_t size, const char *caller) {
return((void *) realloc(mem, size));
}
void my_free(PSDoc *p, void *mem) {
// printf("Freeing memory at 0x%X\n", mem);
free(mem);
}
int main() {
PSDoc *psdoc;
int antiqua;
float boxwidth, boxheight, baseline, colsep, leftmargin;
float fontsize;
int boxed;
boxwidth = 100;
boxheight = 630;
baseline = 100;
colsep = 20;
leftmargin = 100;
boxed = 0;
fontsize = 10.0;
PS_boot();
psdoc = PS_new2(NULL, my_malloc, my_realloc, my_free, NULL);
PS_open_file(psdoc, "polish.ps");
PS_set_info(psdoc, "Creator", __FILE__);
PS_set_info(psdoc, "Author", "Uwe Steinmann");
PS_set_info(psdoc, "Title", "Polish letters");
PS_set_info(psdoc, "Keywords", "polish, latin2, iso-8859-1");
PS_set_info(psdoc, "BoundingBox", "0 0 596 842");
PS_set_parameter(psdoc, "inputencoding", "ISO-8859-2");
PS_set_parameter(psdoc, "warning", "true");
antiqua = PS_findfont(psdoc, "plr10", "", 1);
PS_begin_page(psdoc, 596, 842);
PS_setfont(psdoc, antiqua, 10.0);
PS_set_value(psdoc, "leading", 15.0);
PS_show_xy(psdoc, "±æê³ñ󶼿 ¡ÆÊ£ÑÓ¦¬¯", leftmargin, 100);
PS_show_xy(psdoc, "><=!abc~_-", leftmargin, 200);
PS_end_page(psdoc);
PS_deletefont(psdoc, antiqua);
PS_close(psdoc);
PS_delete(psdoc);
PS_shutdown();
exit(0);
}
You need to use -lps when linking (or maybe -lps-mp).
This is specified in the documentation:
Programs which want to use pslib will have to include the header file libps/pslib.h and link against libps
The general rule is that -lXXX is used to link the library names libXXX.

CaptureStackBackTrace inconsistencies using FramesToSkip

On windows you can capturing the stack trace using CaptureStackBackTrace as
void* frames[USHRT_MAX];
USHORT framesCount = CaptureStackBackTrace(0, USHRT_MAX, frames, NULL);
However, capturing it by smaller chunks in a loop to avoid allocating a USHRT_MAX buffer doesn't provide the same result.
This code
#include <Windows.h>
#include <assert.h>
#include <stdio.h>
__declspec(noinline) void CheckStack(void)
{
printf("Checking stack...\n");
void* entireStack[USHRT_MAX];
USHORT frameCount = CaptureStackBackTrace(0, USHRT_MAX, entireStack, NULL);
printf("Stack size is: %u\n", frameCount);
ULONG frameOffset = 1;
for (;;)
{
void* chunk[64];
USHORT framesFound = CaptureStackBackTrace(frameOffset, 64, chunk, NULL);
if (framesFound)
{
if (memcmp(entireStack + frameOffset, chunk, sizeof(chunk)) != 0)
{
printf("Incorrect content\n");
}
frameOffset += (ULONG)framesFound;
}
else
{
break;
}
}
if (frameCount != frameOffset)
{
printf("Incorrect count (%u != %u)\n", frameCount, frameOffset);
}
printf("Done\n");
}
__declspec(noinline) void Test(int i)
{
if (i != 500)
Test(++i);
else
CheckStack();
}
int main()
{
Test(0);
}
produces the following output
Checking stack...
Stack size is: 507
Incorrect count (507 != 257)
Done
when building as cl /Od main.c /link /OUT:main.exe.
Am I using the FramesToSkip parameter incorrectly or why are the counts not equal?
If you are using Windows Server 2003 and Windows XP,
The sum of the FramesToSkip and FramesToCapture parameters must be
less than 63.
That's in document.
Else, as #RbMm says, In the API source code, there is the following logic:
if(FramesToSkip>0xfe)
{
return 0; //There are too many stack structures skipped, returning directly to 0.
}
However, this is not metioned on msdn both in the CaptureStackBackTrace and RtlCaptureStackBackTrace.
I am not going to post the source code here, but prove it in debugging:
1.Create a sample:
#include <Windows.h>
#include <assert.h>
#include <stdio.h>
__declspec(noinline) void CheckStack(void)
{
void* entireStack[USHRT_MAX];
USHORT frameCount = CaptureStackBackTrace(255, USHRT_MAX, entireStack, NULL);
}
__declspec(noinline) void Test(int i)
{
if (i != 500)
Test(++i);
else
CheckStack();
}
int main()
{
Test(0);
}
2. Step into CaptureStackBackTrace in Disassembly:
You can see that dword ptr[ebp+8](the first parameter of CaptureStackBackTrace pushed in stack) will be compared with 0feh(254). If true, return 0.

How to make thread safe program?

On a 64-bit architecture pc, the next program should return the result 1.350948.
But it is not thread safe and every time I run it gives (obviously) a different result.
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <pthread.h>
const unsigned int ndiv = 1000;
double res = 0;
struct xval{
double x;
};
// Integrate exp(x^2 + y^2) over the unit circle on the
// first quadrant.
void* sum_function(void*);
void* sum_function(void* args){
unsigned int j;
double y = 0;
double localres = 0;
double x = ((struct xval*)args)->x;
for(j = 0; (x*x)+(y*y) < 1; y = (++j)*(1/(double)ndiv)){
localres += exp((x*x)+(y*y));
}
// Globla variable:
res += (localres/(double)(ndiv*ndiv));
// This is not thread safe!
// mutex? futex? lock? semaphore? other?
}
int main(void){
unsigned int i;
double x = 0;
pthread_t thr[ndiv];
struct xval* xvarray;
if((xvarray = calloc(ndiv, sizeof(struct xval))) == NULL){
exit(EXIT_FAILURE);
}
for(i = 0; x < 1; x = (++i)*(1/(double)ndiv)){
xvarray[i].x = x;
pthread_create(&thr[i], NULL, &sum_function, &xvarray[i]);
// Should check return value.
}
for(i = 0; i < ndiv; i++){
pthread_join(thr[i], NULL);
// If
// pthread_join(thr[i], &retval);
// res += *((double*)retval) <-?
// there would be no problem.
}
printf("The integral of exp(x^2 + y^2) over the unit circle on\n\
the first quadrant is: %f\n", res);
return 0;
}
How can it be thread safe?
NOTE: I know that 1000 threads is not a good way to solve this problem, but I really really want to know how to write thread-safe c programs.
Compile the above program with
gcc ./integral0.c -lpthread -lm -o integral
pthread_mutex_lock(&my_mutex);
// code to make thread safe
pthread_mutex_unlock(&my_mutex);
Declare my_mutex either as a global variable like pthread_mutex_t my_mutex;. Or initialize in code using pthread_mutex_t my_mutex; pthread_mutex_init(&my_mutex, NULL);. Also don't forget to include #include <pthread.h> and link your program with -lpthread when compiling.
The question (in a comment in the code):
// mutex? futex? lock? semaphore? other?
Answer: mutex.
See pthread_mutex_init, pthread_mutex_lock, and pthread_mutex_unlock.

Getting back trace for ARC platform from signal handler context

I want to catch SIGSEGV and print the back trace in the logs before my program exits. This is to analyze the crash at a later point of time. I am working on a software which runs on multiple platforms. On x86 platform I can do this easily by using glibc backtrace() function. But the same is not available for MIPS and ARC platforms. I am able to print the back trace for MIPS architecture as explained here
I want to do something similar for ARC platform as well. It would be great help if someone can give some data points on where I can get similar details.
Edit:
After some research I figured out that in ARC platform for a function call, stack is not allocated at once but allocated in parts. (Correct me if I am wrong. I went through the object dump and figured this out.) So I feel it will be hard to do binary code parsing in this case as opposed to MIPS.
Another approach would be to write some inline assembly in C and get stack pointer, frame pointer and branch link register content (blink) and then try to unwind the stack using stack & frame size and print value of blink in each frame. But I am not able to find the frame size.
Here is a sample code to get FP,SP,BLINK.
int func2(int func2_arg)
{
unsigned long *stack2_addr;
unsigned long *frame2_addr;
unsigned long *blink2_addr;
printf("\nFunc : %s\n",__FUNCTION__);
__asm__ __volatile__ ("st sp,[sp,4]");
printf("Stack pointer: %d\n",stack2_addr);
__asm__ __volatile__ ("st blink,[sp,12]");
printf("Blink: %d \n",blink2_addr);
__asm__ __volatile__ ("st fp,[sp,8]");
printf("Frame pointer2: %d, %d\n",frame2_addr,*frame2_addr);
return 0;
}
Yes this is not good coding! I have made many assumptions. But for me it is fine as far as it is working on my board. :)
Any help would be greatly appreciated. Here is another reference on ARC gcc.
Finally found some open source code (Apache license) which does what was required. Here is the code which works.
Sorry about the big code post.
/*
* Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "backtrace.h"
#include <errno.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include "compiler.h"
#include "vlog.h"
VLOG_DEFINE_THIS_MODULE(backtrace);
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
void
backtrace_capture(struct backtrace *b)
{
void *frames[BACKTRACE_MAX_FRAMES];
int i;
b->n_frames = backtrace(frames, BACKTRACE_MAX_FRAMES);
for (i = 0; i < b->n_frames; i++) {
b->frames[i] = (uintptr_t) frames[i];
}
}
#elif __GNUC__
static uintptr_t
get_max_stack(void)
{
static const char file_name[] = "/proc/self/maps";
char line[1024];
int line_number;
FILE *f;
f = fopen(file_name, "r");
if (f == NULL) {
VLOG_WARN("opening %s failed: %s", file_name, strerror(errno));
return -1;
}
for (line_number = 1; fgets(line, sizeof line, f); line_number++) {
if (strstr(line, "[stack]")) {
uintptr_t end;
if (sscanf(line, "%*x-%"SCNxPTR, &end) != 1) {
VLOG_WARN("%s:%d: parse error", file_name, line_number);
continue;
}
fclose(f);
return end;
}
}
fclose(f);
VLOG_WARN("%s: no stack found", file_name);
return -1;
}
static uintptr_t
stack_high(void)
{
static uintptr_t high;
if (!high) {
high = get_max_stack();
}
return high;
}
static uintptr_t
stack_low(void)
{
uintptr_t low = (uintptr_t) &low;
return low;
}
static bool
in_stack(void *p)
{
uintptr_t address = (uintptr_t) p;
return address >= stack_low() && address < stack_high();
}
void
backtrace_capture(struct backtrace *backtrace)
{
void **frame;
size_t n;
n = 0;
for (frame = __builtin_frame_address(1);
frame != NULL && in_stack(frame) && frame[0] != NULL
&& n < BACKTRACE_MAX_FRAMES;
frame = frame[0])
{
backtrace->frames[n++] = (uintptr_t) frame[1];
}
backtrace->n_frames = n;
}
#else /* !HAVE_BACKTRACE && !__GNUC__ */
void
backtrace_capture(struct backtrace *backtrace)
{
backtrace->n_frames = 0;
}
#endif
Hope this will be useful for someone else also !

Resources