I am trying to run and debug a C program on a dsPIC30f3011 microcontroller. When I run my code in MPLAB, the code always tends to stop at this ISR and I am stuck with absolutely no output for any variables, with my code not even executing. It seems to be some kind of "trap" program that I assume is for catching simple mistakes (i.e. oscillator failures, etc.) I am using MPLabIDE v8.5, with an MPLab ICD3 in debug mode. It's worth mentioning that MPLAB shows that I am connected to both the target(dsPIC) and the ICD3. Can someone please give me a reason as to why this problem is occurring?
Here is the ISR:
void _ISR __attribute__((no_auto_psv))_AddressError(void)
{
INTCON1bits.ADDRERR = 0;
while(1);
}
Here is my code with initializations first, then PID use, then the DSP functions,
then the actual DSP header file where the syntax/algorithm is derived. There is also some sort of problem where I define DutyCycle.
///////////////////////////////Initializations/////////////////////////////////////////////
#include "dsp.h" //see bottom of program
tPID SPR4535_PID; // Declare a PID Data Structure named, SPR4535_PID, initialize the PID object
/* The SPR4535_PID data structure contains a pointer to derived coefficients in X-space and */
/* pointer to controller state (history) samples in Y-space. So declare variables for the */
/* derived coefficients and the controller history samples */
fractional abcCoefficient[3] __attribute__ ((space(xmemory))); // ABC Coefficients loaded from X memory
fractional controlHistory[3] __attribute__ ((space(ymemory))); // Control History loaded from Y memory
/* The abcCoefficients referenced by the SPR4535_PID data structure */
/* are derived from the gain coefficients, Kp, Ki and Kd */
/* So, declare Kp, Ki and Kd in an array */
fractional kCoeffs[] = {0,0,0};
//////////////////////////////////PID variable use///////////////////////////////
void ControlSpeed(void)
{
LimitSlew();
PID_CHANGE_SPEED(SpeedCMD);
if (timer3avg > 0)
ActualSpeed = SPEEDMULT/timer3avg;
else
ActualSpeed = 0;
max=2*(PTPER+1);
DutyCycle=Fract2Float(PID_COMPUTE(ActualSpeed))*max;
// Just make sure the speed that will be written to the PDC1 register is not greater than the PTPER register
if(DutyCycle>max)
DutyCycle=max;
else if (DutyCycle<0)
DutyCycle=0;
}
//////////////////////////////////PID functions//////////////////////////////////
void INIT_PID(int DESIRED_SPEED)
{
SPR4535_PID.abcCoefficients = &abcCoefficient[0]; //Set up pointer to derived coefficients
SPR4535_PID.controlHistory = &controlHistory[0]; //Set up pointer to controller history samples
PIDInit(&SPR4535_PID); //Clear the controller history and the controller output
kCoeffs[0] = KP; // Sets the K[0] coefficient to the KP
kCoeffs[1] = KI; // Sets the K[1] coefficient to the KI
kCoeffs[2] = KD; // Sets the K[2] coefficient to the Kd
PIDCoeffCalc(&kCoeffs[0], &SPR4535_PID); //Derive the a,b, & c coefficients from the Kp, Ki & Kd
SPR4535_PID.controlReference = DESIRED_SPEED; //Set the Reference Input for your controller
}
int PID_COMPUTE(int MEASURED_OUTPUT)
{
SPR4535_PID.measuredOutput = MEASURED_OUTPUT; // Records the measured output
PID(&SPR4535_PID);
return SPR4535_PID.controlOutput; // Computes the control output
}
void PID_CHANGE_SPEED (int NEW_SPEED)
{
SPR4535_PID.controlReference = NEW_SPEED; // Changes the control reference to change the desired speed
}
/////////////////////////////////////dsp.h/////////////////////////////////////////////////
typedef struct {
fractional* abcCoefficients; /* Pointer to A, B & C coefficients located in X-space */
/* These coefficients are derived from */
/* the PID gain values - Kp, Ki and Kd */
fractional* controlHistory; /* Pointer to 3 delay-line samples located in Y-space */
/* with the first sample being the most recent */
fractional controlOutput; /* PID Controller Output */
fractional measuredOutput; /* Measured Output sample */
fractional controlReference; /* Reference Input sample */
} tPID;
/*...........................................................................*/
extern void PIDCoeffCalc( /* Derive A, B and C coefficients using PID gain values-Kp, Ki & Kd*/
fractional* kCoeffs, /* pointer to array containing Kp, Ki & Kd in sequence */
tPID* controller /* pointer to PID data structure */
);
/*...........................................................................*/
extern void PIDInit ( /* Clear the PID state variables and output sample*/
tPID* controller /* pointer to PID data structure */
);
/*...........................................................................*/
extern fractional* PID ( /* PID Controller Function */
tPID* controller /* Pointer to PID controller data structure */
);
The dsPIC traps don't offer much information free of charge, so I tend to augment the ISRs with a little assembly language pre-prologue. (Note that the Stack Error trap is a little ropey, as it uses RCALL and RETURN instructions when the stack is already out of order.)
/**
* \file trap.s
* \brief Used to provide a little more information during development.
*
* The trapPreprologue function is called on entry to each of the routines
* defined in traps.c. It looks up the stack to find the value of the IP
* when the trap occurred and stores it in the _errAddress memory location.
*/
.global __errAddress
.global __intCon1
.global _trapPreprologue
.section .bss
__errAddress: .space 4
__intCon1: .space 2
.section .text
_trapPreprologue:
; Disable maskable interrupts and save primary regs to shadow regs
bclr INTCON2, #15 ;global interrupt disable
push.s ;Switch to shadow registers
; Retrieve the ISR return address from the stack into w0:w1
sub w15, #4, w2 ;set W2 to the ISR.PC (SP = ToS-4)
mov [--w2], w0 ;get the ISR return address LSW (ToS-6) in w0
bclr w0, #0x0 ;mask out SFA bit (w0<0>)
mov [--w2], w1 ;get the ISR return address MSW (ToS-8) in w1
bclr w1, #0x7 ;mask out IPL<3> bit (w1<7>)
ze w1, w1 ;mask out SR<7:0> bits (w1<15..8>)
; Save it
mov #__errAddress, w2 ;Move address of __errAddress into w2
mov.d w0, [w2] ;save the ISR return address to __errAddress
; Copy the content of the INTCON1 SFR into memory
mov #__intCon1, w2 ;Move address of __intCon1 into w2
mov INTCON1, WREG ;Read the trap flags into w0 (WREG)
mov w0, [w2] ;save the trap flags to __intCon1
; Return to the 'C' handler
pop.s ;Switch back to primary registers
return
Then I keep all the trap ISRs in a single traps.c file that uses the pre-prologue in traps.s. Note that the actual traps may be different for your microcontroller - check the data sheet to see which are implemented.
/**
* \file traps.c
* \brief Micro-controller exception interrupt vectors.
*/
#include <stdint.h>
#include "traps.h" // Internal interface to the micro trap handling.
/* Access to immediate call stack. Implementation in trap.s */
extern volatile unsigned long _errAddress;
extern volatile unsigned int _intCon1;
extern void trapPreprologue(void);
/* Trap information, set by the traps that use them. */
static unsigned int _intCon2;
static unsigned int _intCon3;
static unsigned int _intCon4;
/* Protected functions exposed by traps.h */
void trapsInitialise(void)
{
_errAddress = 0;
_intCon1 = 0;
_intCon2 = 0;
_intCon3 = 0;
_intCon4 = 0;
}
/* Trap Handling */
// The trap routines call the _trapPreprologue assembly routine in traps.s
// to obtain the value of the PC when the trap occurred and store it in
// the _errAddress variable. They reset the interrupt source in the CPU's
// INTCON SFR and invoke the (#defined) vThrow macro to report the fault.
void __attribute__((interrupt(preprologue("rcall _trapPreprologue")),no_auto_psv)) _OscillatorFail(void)
{
INTCON1bits.OSCFAIL = 0; /* Clear the trap flag */
vThrow(_intCon1, _intCon2, _intCon3, _intCon4, _errAddress);
}
void __attribute__((interrupt(preprologue("rcall _trapPreprologue")),no_auto_psv)) _StackError(void)
{
INTCON1bits.STKERR = 0; /* Clear the trap flag */
vThrow(_intCon1, _intCon2, _intCon3, _intCon4, _errAddress);
}
void __attribute__((interrupt(preprologue("rcall _trapPreprologue")),no_auto_psv)) _AddressError(void)
{
INTCON1bits.ADDRERR = 0; /* Clear the trap flag */
vThrow(_intCon1, _intCon2, _intCon3, _intCon4, _errAddress);
}
void __attribute__((interrupt(preprologue("rcall _trapPreprologue")),no_auto_psv)) _MathError(void)
{
INTCON1bits.MATHERR = 0; /* Clear the trap flag */
vThrow(_intCon1, _intCon2, _intCon3, _intCon4, _errAddress);
}
void __attribute__((interrupt(preprologue("rcall _trapPreprologue")),no_auto_psv)) _DMACError(void)
{
INTCON1bits.DMACERR = 0; /* Clear the trap flag */
vThrow(_intCon1, _intCon2, _intCon3, _intCon4, _errAddress);
}
void __attribute__((interrupt(preprologue("rcall _trapPreprologue")),no_auto_psv)) _HardTrapError(void)
{
_intCon4 = INTCON4;
INTCON4 = 0; // Clear the hard trap register
_intCon2 = INTCON2;
INTCON2bits.SWTRAP = 0; // Make sure the software hard trap bit is clear
vThrow(_intCon1, _intCon2, _intCon3, _intCon4, _errAddress);
}
void __attribute__((interrupt(preprologue("rcall _trapPreprologue")),no_auto_psv)) _SoftTrapError(void)
{
_intCon3 = INTCON3;
INTCON3 = 0; // Clear the soft trap register
vThrow(_intCon1, _intCon2, _intCon3, _intCon4, _errAddress);
}
Implementation of the vThrow macro is up to you. However, it should not use the stack, as this may be unavailable (so no puts() debug calls!) During development, it would be reasonable to use a simple endless loop with a NOP statement in it that you can breakpoint on.
(In a production build, my vThrow macro logs the parameters into a reserved area of RAM that is excluded from being zeroed at start-up by the linker script, and resets the microcontroller. During start-up the program inspects the reserved area and if it is non-zero records the error event for diagnostics.)
Once you get a trap, inspecting the content of the _errAddress variable will give you the return address for the ISR, which is the address immediately following the instruction that generated the interrupt. You can then inspect your MAP files to find the routine and, if you're really keen, inspect the disassembly to find the specific instruction. After that, debugging it is up to you.
As suggested in the comments, the while(1) statement is where your code is getting hung. Note however, that your code is executing - you're just in an infinite loop. That's also why you can't view your variables or current program counter. Generally when you're attached to a ucontroller via PC host, you can't view state information while the ucontroller is executing. Everything is running too fast, even on a slow one, to constantly update your screen.
To try to identify the cause, you can set a breakpoint in the ISR and reset the controller. When the breakpoint is hit, execution will halt, and you may be able to investigate your stack frames to see the last line of code executed before the ISR was triggered. This is not guaranteed though - depending on how your particular ucontroller handles interrupts, the call stack may not be continuous between normal program execution and the interrupt context.
If that doesn't work, set a breakpoint in your code before the ISR is invoked, and step through your code until it is. The last line of code you executed before the ISR will be the cause. Keep in mind, this may take some time, especially if the offending line is in the loop and only trips the ISR after a certain number of iterations.
EDIT
After posting this answer, I noticed your last comment about the linkscript warning. This is a perfect example of why you should, with very few exceptions, work just as hard to resolve warnings as you do to resolve compiler errors. Especially if you don't understand what the warning means and what caused it.
A PID algorithm involves multiplication. On a dspic, this is done via the built in hardware multiplier. This multiplier has one register which must point to xmemory space and another pointing to ymemory space. The dsp core then multiplies these two and the result can be found in the accumulator (there a two of them).
An addres error trap will be triggered if an xmemory address range is loaded into the ymemory register and viceversa. You can check this by single stepping the code in the assembly.
This is not the only instance the trap is triggered. There are also silicon bugs that can cause this, check the errata.
Related
I'm using MikroC for PIC v7.2, to program a PIC18f67k40.
Within functii.h, I have the following variable declaration:
extern volatile unsigned char byte_count;
Within main.c, the following code:
#include <functii.h>
// ...
volatile unsigned char byte_count = 0;
// ...
void interrupt () {
if (RC1IF_bit) {
uart_rx = Uart1_read();
uart_string[byte_count] = uart_rx;
byte_count++;
}
// ...
}
Then, within command.c, I have the following code:
#include <functii.h>
void how_many_bytes () {
// ...
uart1_write(byte_count);
// ...
}
In main.c, I process data coming through the UART, using an interrupt. Once the end of transmission character is received, I call how_many_bytes(), which sends back the length of the message that was received (plus the data bytes themselves, the code for which I didn't include here, but those are all OK!!).
The problem is that on the uart1_write() call, byte_count is always 0, instead of having been incremented in the interrupt sequence.
Probably you need some synchronization between the interrupt handler and the main processing.
If you do something like this
if(byte_count != 0) {
uart1_write(byte_count);
byte_count = 0;
}
the interrupt can occur anywhere, for example
between if(byte_count != 0)and uart1_write(byte_count); or
during the processing of uart1_write(byte_count); which uses a copy of the old value while the value gets changed or
between uart1_write(byte_count); and byte_count = 0;.
With the code above case 1 is no problem but 2 and 3 are. You would lose all characters received after reading byte_count for the function call.
Maybe you can disable/enable interrupts at certain points.
A better solution might be to not reset byte_count outside of interrupt() but instead implement a ring buffer with separate read and write index. The read index would be modified by how_many_bytes() (or uart1_write()) only and the write index by interrupt() only.
I have a 64 bit integer variable on a 32 bit Cortex-M3 ARM controller (STM32L1), which can be modified asynchronously by an interrupt handler.
volatile uint64_t v;
void some_interrupt_handler() {
v = v + something;
}
Obviously, I need a way to access it in a way that prevents getting inconsistent, halfway updated values.
Here is the first attempt
static inline uint64_t read_volatile_uint64(volatile uint64_t *x) {
uint64_t y;
__disable_irq();
y = *x;
__enable_irq();
return y;
}
The CMSIS inline functions __disable_irq() and __enable_irq() have an unfortunate side effect, forcing a memory barrier on the compiler, so I've tried to come up with something more fine-grained
static inline uint64_t read_volatile_uint64(volatile uint64_t *x) {
uint64_t y;
asm ( "cpsid i\n"
"ldrd %[value], %[addr]\n"
"cpsie i\n"
: [value]"=r"(y) : [addr]"m"(*x));
return y;
}
It still disables interrupts, which is not desirable, so I'm wondering if there's a way doing it without resorting to cpsid. The Definitive Guide to
ARM Cortex-M3 and Cortex-M4 Processors, Third Edition by Joseph Yiu says
If an interrupt request arrives when the processor is executing a
multiple cycle instruction, such as an integer divide, the instruction
could be abandoned and restarted after the interrupt handler
completes. This behavior also applies to load double-word (LDRD) and
store double-word (STRD) instructions.
Does it mean that I'll be fine by simply writing this?
static inline uint64_t read_volatile_uint64(volatile uint64_t *x) {
uint64_t y;
asm ( "ldrd %[value], %[addr]\n"
: [value]"=&r"(y) : [addr]"m"(*x));
return y;
}
(Using "=&r" to work around ARM errata 602117)
Is there some library or builtin function that does the same portably? I've tried atomic_load() in stdatomic.h, but it fails with undefined reference to '__atomic_load_8'.
Yes, using a simple ldrd is safe in this application since it will be restarted (not resumed) if interrupted, hence it will appear atomic from the interrupt handler's point of view.
This holds more generally for all load instructions except those that are exception-continuable, which are a very restricted subset:
only ldm, pop, vldm, and vpop can be continuable
an instruction inside an it-block is never continuable
an ldm/pop whose first loaded register is also the base register (e.g. ldm r0, { r0, r1 }) is never continuable
This gives plenty of options for atomically reading a multi-word variable that's modified by an interrupt handler on the same core. If the data you wish to read is not a contiguous array of words then you can do something like:
1: ldrex %[val0], [%[ptr]] // can also be byte/halfword
... more loads here ...
strex %[retry], %[val0], [%[ptr]]
cbz %[retry], 2f
b 1b
2:
It doesn't really matter which word (or byte/halfword) you use for the ldrex/strex since an exception will perform an implicit clrex.
The other direction, writing a variable that's read by an interrupt handler is a lot harder. I'm not 100% sure but I think the only stores that are guaranteed to appear atomic to an interrupt handler are those that are "single-copy atomic", i.e. single byte, aligned halfword, and aligned word. Anything bigger would require disabling interrupts or using some clever lock-free structure.
Atomicity is not guaranteed on LDRD according to the ARMv7m reference manual. (A3.5.1)
The only ARMv7-M explicit accesses made by the ARM processor which exhibit single-copy atomicity are:
• All byte transactions
• All halfword transactions to 16-bit aligned locations
• All word transactions to 32-bit aligned locations
LDM, LDC, LDRD, STM, STC, STRD, PUSH and POP operations are seen to be a sequence of 32-bit
transactions aligned to 32 bits. Each of these 32-bit transactions are guaranteed to exhibit single-copy
atomicity. Sub-sequences of two or more 32-bit transactions from the sequence also do not exhibit
single-copy atomicity
What you can do is use a byte to indicate to the ISR you're reading it.
non_isr(){
do{
flag = 1
foo = doubleword
while(flag > 1)
flag = 0
}
isr(){
if(flag == 1)
flag++;
doubleword = foo
}
Source (login required):
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.b/index.html
Login not required:
http://www.telecom.uff.br/~marcos/uP/ARMv7_Ref.pdf
I was also trying to use a 64-bit (2 x 32-bit) system_tick, but on an STM32L4xx (ARM cortex M3). I found that when I tried to use just "volatile uint64_t system_tick", compiler injected assembly instruction LDRD, which may have been enough, since getting interrupted after reading the first word is supposed to cause both words to be read again.
I asked the tech at IAR software support and he responded that I should use C11 atomics;
#include "stdatomic.h"
#ifdef __STDC_NO_ATOMICS__
static_assert(__STDC_NO_ATOMICS__ != 1);
#endif
volatile atomic_uint_fast64_t system_tick;
/**
* \brief Increment system_timer
* \retval none
*/
void HAL_IncTick(void)
{
system_tick++;
}
/**
* \brief Read 64-bit system_tick
* \retval system_tick
*/
uint64_t HAL_GetSystemTick(void)
{
return system_tick;
}
/**
* \brief Read 32 least significant bits of system_tick
* \retval (uint64_t) system_tick
*/
uint32_t HAL_GetTick(void)
{
return (uint32_t)system_tick;
}
But what I found was a colossal amount of code was added to make the read "atomic".
Way back in the day of 8-bit micro-controllers, the trick was to read the high byte, read the low byte, then read the high byte until the high byte was the same twice - proving that there was no rollover created by the ISR. So if you are against disabling IRQ, reading system_tick, then enabling IRQ, try this trick:
/**
* \brief Read 64-bit system_tick
* \retval system_tick
*/
uint64_t HAL_GetSystemTick(void)
{
uint64_t tick;
do {
tick = system_tick;
} while ((uint32_t)(system_tick >> 32) != (uint32_t)(tick >> 32));
return tick;
}
The idea is that if the most significant word does not roll over, then then whole 64-bit system_timer must be valid. If HAL_IncTick() did anything more than a simple increment, this assertion would not be possible.
I have to change the designated section of function_b so that it changes the stack in such a way that the program prints:
Executing function_a
Executing function_b
Finished!
At this point it also prints Executed function_b in between Executing function_b and Finished!.
I have the following code and I have to fill something in, in the part where it says // ... insert code here
#include <stdio.h>
void function_b(void){
char buffer[4];
// ... insert code here
fprintf(stdout, "Executing function_b\n");
}
void function_a(void) {
int beacon = 0x0b1c2d3;
fprintf(stdout, "Executing function_a\n");
function_b();
fprintf(stdout, "Executed function_b\n");
}
int main(void) {
function_a();
fprintf(stdout, "Finished!\n");
return 0;
}
I am using Ubuntu Linux with the gcc compiler. I compile the program with the following options: -g -fno-stack-protector -fno-omit-frame-pointer. I am using an intel processor.
Here is a solution, not exactly stable across environments, but works for me on x86_64 processor on Windows/MinGW64.
It may not work for you out of the box, but still, you might want to use a similar approach.
void function_b(void) {
char buffer[4];
buffer[0] = 0xa1; // part 1
buffer[1] = 0xb2;
buffer[2] = 0xc3;
buffer[3] = 0x04;
register int * rsp asm ("rsp"); // part 2
register size_t r10 asm ("r10");
r10 = 0;
while (*rsp != 0x04c3b2a1) {rsp++; r10++;} // part 3
while (*rsp != 0x00b1c2d3) rsp++; // part 4
rsp -= r10; // part 5
rsp = (int *) ((size_t) rsp & ~0xF); // part 6
fprintf(stdout, "Executing function_b\n");
}
The trick is that each of function_a and function_b have only one local variable, and we can find the address of that variable just by searching around in the memory.
First, we put a signature in the buffer, let it be the 4-byte integer 0x04c3b2a1 (remember that x86_64 is little-endian).
After that, we declare two variables to represent the registers: rsp is the stack pointer, and r10 is just some unused register.
This allows to not use asm statements later in the code, while still being able to use the registers directly.
It is important that the variables don't actually take stack memory, they are references to processor registers themselves.
After that, we move the stack pointer in 4-byte increments (since the size of int is 4 bytes) until we get to the buffer. We have to remember the offset from the stack pointer to the first variable here, and we use r10 to store it.
Next, we want to know how far in the stack are the instances of function_b and function_a. A good approximation is how far are buffer and beacon, so we now search for beacon.
After that, we have to push back from beacon, the first variable of function_a, to the start of instance of the whole function_a on the stack.
That we do by subtracting the value stored in r10.
Finally, here comes a werider bit.
At least on my configuration, the stack happens to be 16-byte aligned, and while the buffer array is aligned to the left of a 16-byte block, the beacon variable is aligned to the right of such block.
Or is it something with a similar effect and different explanation?..
Anyway, so we just clear the last four bits of the stack pointer to make it 16-byte aligned again.
The 32-bit GCC doesn't align anything for me, so you might want to skip or alter this line.
When working on a solution, I found the following macro useful:
#ifdef DEBUG
#define show_sp() \
do { \
register void * rsp asm ("rsp"); \
fprintf(stdout, "stack pointer is %016X\n", rsp); \
} while (0);
#else
#define show_sp() do{}while(0);
#endif
After this, when you insert a show_sp(); in your code and compile with -DDEBUG, it prints what is the value of stack pointer at the respective moment.
When compiling without -DDEBUG, the macro just compiles to an empty statement.
Of course, other variables and registers can be printed in a similar way.
ok, let assume that epilogue (i.e code at } line) of function_a and for function_b is the same
despite functions A and B not symmetric, we can assume this because it have the same signature (no parameters, no return value), same calling conventions and same size of local variables (4 byte - int beacon = 0x0b1c2d3 vs char buffer[4];) and with optimization - both must be dropped because unused. but we must not use additional local variables in function_b for not break this assumption. most problematic point here - what is function_A or function_B will be use nonvolatile registers (and as result save it in prologue and restore in epilogue) - but however look like here no place for this.
so my next code based on this assumption - epilogueA == epilogueB (really solution of #Gassa also based on it.
also need very clearly state that function_a and function_b must not be inline. this is very important - without this any solution impossible. so I let yourself add noinline attribute to function_a and function_b. note - not code change but attribute add, which author of this task implicitly implies but not clearly stated. don't know how in GCC mark function as noinline but in CL __declspec(noinline) for this used.
next code I write for CL compiler where exist next intrinsic function
void * _AddressOfReturnAddress();
but I think that GCC also must have the analog of this function. also I use
void* _ReturnAddress();
but however really _ReturnAddress() == *(void**)_AddressOfReturnAddress() and we can use _AddressOfReturnAddress() only. simply using _ReturnAddress() make source (but not binary - it equal) code smaller and more readable.
and next code is work for both x86 and x64. and this code work (tested) with any optimization.
despite I use 2 global variables - code is thread safe - really we can call main from multiple threads in concurrent, call it multiple time - but all will be worked correct (only of course how I say at begin if epilogueA == epilogueB)
hope comments in code enough self explained
__declspec(noinline) void function_b(void){
char buffer[4];
buffer[0] = 0;
static void *IPa, *IPb;
// save the IPa address
_InterlockedCompareExchangePointer(&IPa, _ReturnAddress(), 0);
if (_ReturnAddress() == IPa)
{
// we called from function_a
function_b();
// <-- IPb
if (_ReturnAddress() == IPa)
{
// we called from function_a, change return address for return to IPb instead IPa
*(void**)_AddressOfReturnAddress() = IPb;
return;
}
// we at stack of function_a here.
// we must be really at point IPa
// and execute fprintf(stdout, "Executed function_b\n"); + '}' (epilogueA)
// but we will execute fprintf(stdout, "Executing function_b\n"); + '}' (epilogueB)
// assume that epilogueA == epilogueB
}
else
{
// we called from function_b
IPb = _ReturnAddress();
return;
}
fprintf(stdout, "Executing function_b\n");
// epilogueB
}
__declspec(noinline) void function_a(void) {
int beacon = 0x0b1c2d3;
fprintf(stdout, "Executing function_a\n");
function_b();
// <-- IPa
fprintf(stdout, "Executed function_b\n");
// epilogueA
}
int main(void) {
function_a();
fprintf(stdout, "Finished!\n");
return 0;
}
I want my interrupt service routine to use a different stack(may be of its own) & not use the caller thread's stack.
thread_entry (){
do_something();
--> Interrupt occurs
do_otherstuff();
}
void interrupt_routine ()
{
uint8_t read_byte; // I don't want this to be part of caller thread's stack
read_byte= hw_read();
}
Is it possible & how to achieve this?
The stacks required for OS and interrupt handlers is set up at initialization itself. This is again architecture specific code. For case of ARM processors it has a distinct R13 that is used when the processor is in the interrupt mode. Again this register is initialized at bootup. What is the problem you want to address with this design.
The GNU C library for Linux has methods to control the stack in which the signal executes. Refer to the documentation for full details.
The basic idea is that you allocate memory for the stack and the call the function
sigstack()
to specify that this stack is available to be used for signal handling. You then use the
sigaction()
function to register a handler for a particular signal and specify the flag value
SA_ONSTACK
that this handler runs on the special stack
Here is a code snippet showing the pattern, it's "borrowed" from the Linux Programming Interface examples
sigstack.ss_sp = malloc(SIGSTKSZ);
if (sigstack.ss_sp == NULL)
errExit("malloc");
sigstack.ss_size = SIGSTKSZ;
sigstack.ss_flags = 0;
if (sigaltstack(&sigstack, NULL) == -1)
errExit("sigaltstack");
printf("Alternate stack is at %10p-%p\n",
sigstack.ss_sp, (char *) sbrk(0) - 1);
sa.sa_handler = sigsegvHandler; /* Establish handler for SIGSEGV */
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_ONSTACK; /* Handler uses alternate stack */
if (sigaction(SIGSEGV, &sa, NULL) == -1)
errExit("sigaction");
Here's a simple x86 inline assembly implementation. You have a wrapper function which changes the stack, and calls your real routine.
const uint32_t interrupt_stack_size = 4096;
uint8_t interrupt_stack[interrupt_stack_size];
void interrupt_routine_wrap()
{
static int thread_esp;
// Stack grows towards lower addresses, so start at the bottom
static int irq_esp = (int) interrupt_stack + interrupt_stack_size;
// Store the old esp
asm mov dword ptr thread_esp, esp;
// Set the new esp
asm mov esp, dword ptr irq_esp;
// Execute the real interrupt routine
interrupt_routine();
// Restore old esp
asm mov esp, dword ptr thread_esp;
}
I'm completely ignoring the segment register here (ss), but different memory models may need to store that along with sp.
You can get rid of the inline assembly by using setjmp/longjmp to read/write all registers. That's a more portable way to do it.
Also note that I'm not preserving any registers here, and inline assembly may confuse the compiler. Perhaps it'd be worth it to add a pusha/popa pair around the wrapper routine. Compiler may do this for you if you specify the function as interrupt. Check the resulting binary to be certain.
OK, I have this benchmark from SPLASH2 which I am using to test a tool which I have created. The benchmark has the following struct.
typedef struct _interact {
struct _interact *next ; /* Next entry of the list */
Element *destination ; /* Partner of the interaction */
float formfactor_out ; /* Form factor from this patch */
float formfactor_err ; /* Error of FF */
float area_ratio ; /* Area(this) / Area(dest) */
float visibility ; /* Visibility (0 - 1.0) */
} Interaction ;
Looking into the code, I found that area_ratio is never used. However, in the end, I see that the value of area_ratio is not 0, as it is in the beginning. So I placed a watchpoint on this variable, and surprisingly gdb pointed me to a code which modifies visibility (the variable just below the area_ratio).
Now my question is why is this happening. How come area_ratio is modified by modifing visibility. What are the possibilties? Any clue? I'm really puzzled. Note that I'm testing my program on a 64-bit machine. Maybe 64 bit has to do something with it, but I don't know!
The code is something like this:
/* Create links and finish the job */
inter = get_interaction(process_id) ;
*inter = i12 ;
inter->visibility = VISIBILITY_UNDEF ; // <---- This is what gdb is pointing to
Ah I got it! Actually what is happening is that i12 is a local variable, which is not initialized to 0 and when we perform *inter = i12;, the area_ratio of i12 is assigned to *inter and since i12's area_ratio is random and not necessarily 0, that value of area_ratio is assigned to *inter.
And by the way, now I've realized that gdb shows the line number of the one below the intended line, so its not pointing to the line inter->visibility = VISIBILITY_UNDEF, but the line *inter = i12;