previous stack variables - c

I have this problem, I am recursively calling a function in C and C is lexically scoped, so I can only access the current stack frame. I want to extract the arguments and the local variables from the previous stack frame which was created under the previous function call while im on the current stack frame
I know that the values from the previous recursive call are still on the stack, but I cant access access these values because they're "buried" under the active stack frame?
I want to extract the arguments and local variables from the previous stack and copy them to copy_of_buried_arg and copy_of_buried_loc;
It is a requirement to use inline assembly using GAS to extract the variables, this is what I have so far, and I tried all day, I cant seem to figure it out, I drew the stack on paper and did the calculations but nothing is working, I also tried deleting calls to printf so the stack will be cleaner but I cant figure out the right arithmetic. Here is the code so far, my function halts on the second iteration
#include <stdio.h>
char glo = 97; // just for fun 97 is ascii lowercase 'a'
int copy_of_buried_arg;
char copy_of_buried_loc;
void rec(int arg) {
char loc;
loc = glo + arg * 2; // just for fun, some char arithmetic
printf("inside rec() arg=%d loc='%c'\n", arg, loc);
if (arg != 0) {
// after this assembly code runs, the copy_of_buried_arg and
// copy_of_buried_loc variables will have arg, loc values from
// the frame of the previous call to rec().
__asm__("\n\
movl 28(%esp), %eax #moving stack pointer to old ebp (pointing it to old ebp)\n\
addl $8, %eax #now eax points to the first argument for the old ebp \n\
movl (%eax), %ecx #copy the value inside eax to ecx\n\
movl %ecx, copy_of_buried_arg # copies the old argument\n\
\n\
");
printf("copy_of_buried_arg=%u copy_of_buried_loc='%c'\n",
copy_of_buried_arg, copy_of_buried_loc);
} else {
printf("there is no buried stack frame\n");// runs if argument = 0 so only the first time
}
if (arg < 10) {
rec(arg + 1);
}
}
int main (int argc, char **argv) {
rec(0);
return 0;
}

I can try to help, but don't have Linux or assembly in GAS. But the calculations should be similar:
Here's the stack after a couple of calls. A typical stack frame setup creates a linked list of stack frames, where EBP is the current stack frame and points to its old value for the previous stack frame.
+-------+
ESP-> |loc='c'| <- ESP currently points here.
+-------+
EBP-> |oldEBP |--+ <- rec(0)'s call frame
+-------+ |
|retaddr| | <- return value of rec(1)
+-------+ |
|arg=1 | | <- pushed argument of rec(1)
+-------+ |
|loc='a'| | <- local variable of rec(0)
+-------+ |
+--|oldEBP |<-+ <- main's call frame
| +-------+
| |retaddr| <- return value of rec(0)
| +-------+
| |arg=0 | <- pushed argument of rec(0)
| +-------+
\|/
to main's call frame
This is created by the following sequence:
Push arguments last arg first.
Call the function, pushing a return address.
Push soon-to-be old EBP, preserving previous stack frame.
Move ESP (top of stack, containing oldEBP) into EBP, creating new stack frame.
Subtract space for local variables.
This has the effect on a 32-bit stack that EBP+8 will always be the first parameter of the call, EBP+12 the 2nd parameter, etc. EBP-n is always an offset to a local variable.
The code to get the previous loc and arg is then (in MASM):
mov ecx,[ebp] // get previous stack frame
mov edx,[ecx]+8 // get first argument
mov copy_of_buried_arg,edx // save it
mov dl,[ecx]-1 // get first char-sized local variable.
mov copy_of_buried_loc,dl // save it
or my best guess in GAS (I don't know it but know it is backwards to MASM):
movl (%ebp),ecx
movl 8(%ecx),edx
movl edx,copy_of_buried_arg
movb -1(%ecx),dl
movb dl,copy_of_buried_loc
Output of your code with my MASM using VS2010 on Windows:
inside rec() arg=0 loc='a'
there is no buried stack frame
inside rec() arg=1 loc='c'
copy_of_buried_arg=0 copy_of_buried_loc='a'
inside rec() arg=2 loc='e'
copy_of_buried_arg=1 copy_of_buried_loc='c'
inside rec() arg=3 loc='g'
copy_of_buried_arg=2 copy_of_buried_loc='e'
inside rec() arg=4 loc='i'
copy_of_buried_arg=3 copy_of_buried_loc='g'
inside rec() arg=5 loc='k'
copy_of_buried_arg=4 copy_of_buried_loc='i'
inside rec() arg=6 loc='m'
copy_of_buried_arg=5 copy_of_buried_loc='k'
inside rec() arg=7 loc='o'
copy_of_buried_arg=6 copy_of_buried_loc='m'
inside rec() arg=8 loc='q'
copy_of_buried_arg=7 copy_of_buried_loc='o'
inside rec() arg=9 loc='s'
copy_of_buried_arg=8 copy_of_buried_loc='q'
inside rec() arg=10 loc='u'
copy_of_buried_arg=9 copy_of_buried_loc='s'

With my compiler (gcc 3.3.4) I ended up with this:
#include <stdio.h>
char glo = 97; // just for fun 97 is ascii lowercase 'a'
int copy_of_buried_arg;
char copy_of_buried_loc;
void rec(int arg) {
char loc;
loc = glo + arg * 2; // just for fun, some char arithmetic
printf("inside rec() arg=%d loc='%c'\n", arg, loc);
if (arg != 0) {
// after this assembly code runs, the copy_of_buried_arg and
// copy_of_buried_loc variables will have arg, loc values from
// the frame of the previous call to rec().
__asm__ __volatile__ (
"movl 40(%%ebp), %%eax #\n"
"movl %%eax, %0 #\n"
"movb 31(%%ebp), %%al #\n"
"movb %%al, %1 #\n"
: "=m" (copy_of_buried_arg), "=m" (copy_of_buried_loc)
:
: "eax"
);
printf("copy_of_buried_arg=%u copy_of_buried_loc='%c'\n",
copy_of_buried_arg, copy_of_buried_loc);
} else {
printf("there is no buried stack frame\n");// runs if argument = 0 so only the first time
}
if (arg < 10) {
rec(arg + 1);
}
}
int main (int argc, char **argv) {
rec(0);
return 0;
}
Here's the disassembly of the relevant part (get it with gcc file.c -S -o file.s):
_rec:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movl 8(%ebp), %eax
addl %eax, %eax
addb _glo, %al
movb %al, -1(%ebp)
subl $4, %esp
movsbl -1(%ebp),%eax
pushl %eax
pushl 8(%ebp)
pushl $LC0
call _printf
addl $16, %esp
cmpl $0, 8(%ebp)
je L2
/APP
movl 40(%ebp), %eax #
movl %eax, _copy_of_buried_arg #
movb 31(%ebp), %al #
movb %al, _copy_of_buried_loc #
/NO_APP
subl $4, %esp
movsbl _copy_of_buried_loc,%eax
pushl %eax
pushl _copy_of_buried_arg
pushl $LC1
call _printf
addl $16, %esp
jmp L3
L2:
subl $12, %esp
pushl $LC2
call _printf
addl $16, %esp
L3:
cmpl $9, 8(%ebp)
jg L1
subl $12, %esp
movl 8(%ebp), %eax
incl %eax
pushl %eax
call _rec
addl $16, %esp
L1:
leave
ret
Those offsets from ebp (40 and 31) initially were set to an arbitrary guess value (e.g. 0) and then refined through observation of the disassembly and some simple calculations.
Note that the function uses extra 12+4=16 bytes of stack for the alignment and the parameter when it calls itself recursively:
subl $12, %esp
movl 8(%ebp), %eax
incl %eax
pushl %eax
call _rec
addl $16, %esp
There are also 4 bytes of the return address.
And then the function uses 4+8=12 bytes for the old ebp and its local variables:
_rec:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
So, in total the stack grows by 16+4+12=32 bytes with each recursive call.
Now, we know how to get our local arg and loc through ebp:
movl 8(%ebp), %eax ; <- arg
addl %eax, %eax
addb _glo, %al
movb %al, -1(%ebp) ; <- loc
So, we just add 32 to those offsets 8 and -1 and arrive at 40 and 31.
Do the same and you'll get your "buried" variables.

Related

counting the '0's in a string in assembly

I have a little program written with c and assembly. the principle is simple: "count the '0's in a a string given by main.c
so for test0 str0ing 0 it should return 3 because there's 3 '0's in the string
the function itself is made in x86 asm with AT&T syntax, i am given a pointer to the string via C. both main.c and the asm is linked via header file.
this is my code so far and the problem is that it always returns 0. it never reaches conditional jump to increment %eax(to be returned)
// C
#include "asm.h"
char string[] = "2a0 a0 ";
char *ptr1 = string;
int main(){
printf("\nthere are : %d %s in :%s", zero_count(), "0s", string);
printf("\nstring address is: %p\n", ptr1);
return 0;
}
// x86asm
.global ptr1
.section .text
.global zero_count #func()
zero_count:
# prologue
pushl %ebp # save these previous stack frame pointer
movl %esp, %ebp # the stack frame pointer for function
# save registers
#pushl $ebx # needs to be pushed out of stack when used
#pushl %esi # needs to be pushed out of stack when used
#pushl %edi # needs to be pushed out of stack when used
# function body
movl ptr1, %ecx # moves the value of ptr1 to ecx
movl $0, %eax # cleans eax with 0
# loop start
loop_beginning:
cmp $0, (%ecx)
je end
# compare to 'o'
cmp $48, %ecx # 48 is "0" in the asci table
je if_0
increment_pointer:
addl $1, %ecx
jmp loop_beginning
if_0:
addl $1, %eax
jmp increment_pointer
end:
#popl %edi # needs to be popped when used
#popl %esi # needs to be popped when useds
#popl %ebx # needs to be popped when used
# epilogue
movl %ebp, %esp # restore the previous stack pointer("cleaner" the stack)
popl %ebp # restore the previous stack frame pointer
ret #w returns
i apolagize for using global variables in advance, i know it's not good but I'm still learning to use the stack
by switching the cmp to cmpb worked in this string. but i still don't know why. would this operation also worked if this was an int[]?
.global ptr1
.section .text
.global zero_count #func()
zero_count:
# prologue
pushl %ebp # save these previous stack frame pointer
movl %esp, %ebp # the stack frame pointer for function
# save registers
#pushl $ebx # needs to be pushed out of stack when used
#pushl %esi # needs to be pushed out of stack when used
#pushl %edi # needs to be pushed out of stack when used
# function body
mov ptr1, %ecx # moves the value of ptr1 to ecx
movl $0, %eax # cleans eax with 0
# loop start
loop_beginning:
cmpb $0, (%ecx)
je end
# compare to 'o'
cmpb $48, (%ecx) # 48 is "0" in the asci table
je if_0
increment_pointer:
addl $1, %ecx
jmp loop_beginning
if_0:
addl $1, %eax
jmp increment_pointer
# movl (%ecx), %eax
end:
#popl %edi # needs to be popped when used
#popl %esi # needs to be popped when useds
#popl %ebx # needs to be popped when used
# epilogue
movl %ebp, %esp # restore the previous stack pointer("cleaner" the stack)
popl %ebp # restore the previous stack frame pointer
ret #w returns

Writing assembly Language in C [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 8 years ago.
Improve this question
I am writing assembly language in C. I was given the following assembly code:
fn:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
movl $1, -4(%ebp)
jmp .f2
.f3:
movl -4(%ebp), %eax
imull 8(%ebp), %eax
movl %eax, -4(%ebp)
subl $1, 8(%ebp)
.f2:
cmpl $1, 8(%ebp)
jg .f3
movl -4(%ebp), %eax
leave
ret
I have written my code in C below:
fn (int x)
{
int y = 1;
if(x > 1):
int z = y *= x
x – 1;
return z;
}
Can someone tell me if I am on the right track with my C code? or if I am off and if I am can you point me in the right direction.
Thank You in advanced
fn:
pushl %ebp
movl %esp, %ebp
This is the usual function startup used in the calling convertion __cdecl, save existing %ebp to the stack then save %esp (current stack address) to %ebp. %ebp will be from now on the reference to the stack call point of this function used by the assembly code to access both parameters and local variables. To conclude this function, the opposite will be done in the future, as well as setting the "return" value to %eax.
At this point you can assume %ebp points to the current stack position where the previous %ebp is stored, %ebp + 4 is the function's return point, and everything from %ebp + 8 on is the data of the function parameters. Negative values will be accessing unused stack space, where the function can store it's local variables.
subl $16, %esp
This is reserving 22 bytes of information in the stack for local variables. It can be 22 variables of 1 byte or 1 variable of 22 bytes, there is no way to know. It can reserve unused bytes to make stack alignment.
Its important to change the value of %esp at this point so any call to push, pop and call won't overwrite this function's local variables.
movl $1, -4(%ebp)
Here it is possible to assume that the first of the local variables present in this function is a 32-bits integer at the address %ebp - 4. It's value was just set to 1. Lets call this variable i.
jmp .f2
.f3:
movl -4(%ebp), %eax
imull 8(%ebp), %eax
Based on what this imull is doing we can assume the function takes at least one parameter at the address %ebp + 8, and it seems its a 32-bits integer being multiplied by i. Lets call this parameter x.
movl %eax, -4(%ebp)
So far we can assume i = x * i.
subl $1, 8(%ebp)
And now x = x - 1.
.f2:
cmpl $1, 8(%ebp)
jg .f3
Here we are checking if x is greater than 1. If true, jumps to f3, that is a backward jump therefore it seems we have a loop going on.
movl -4(%ebp), %eax
leave
ret
Here outside the loop we see it set %eax to i and terminate the function, we can interpret it as return i.
Compiling the analyzed information together, we can assume the original function must have looked somewhat like this:
int fn(int x)
{
int i = 1;
while (x > 1)
{
i = i * x; // or just i *= x, same thing
x = x - 1; // or just x--, same thing
}
return i;
}

C Code represented as Assembler Code - How to interpret?

I got this short C Code.
#include <stdint.h>
uint64_t multiply(uint32_t x, uint32_t y) {
uint64_t res;
res = x*y;
return res;
}
int main() {
uint32_t a = 3, b = 5, z;
z = multiply(a,b);
return 0;
}
There is also an Assembler Code for the given C code above.
I don't understand everything of that assembler code. I commented each line and you will find my question in the comments for each line.
The Assembler Code is:
.text
multiply:
pushl %ebp // stores the stack frame of the calling function on the stack
movl %esp, %ebp // takes the current stack pointer and uses it as the frame for the called function
subl $16, %esp // it leaves room on the stack, but why 16Bytes. sizeof(res) = 8Bytes
movl 8(%ebp), %eax // I don't know quite what "8(%ebp) mean? It has to do something with res, because
imull 12(%ebp), %eax // here is the multiplication done. And again "12(%ebp).
movl %eax, -8(%ebp) // Now, we got a negative number in front of. How to interpret this?
movl $0, -4(%ebp) // here as well
movl -8(%ebp), %eax // and here again.
movl -4(%ebp), %edx // also here
leave
ret
main:
pushl %ebp // stores the stack frame of the calling function on the stack
movl %esp, %ebp // // takes the current stack pointer and uses it as the frame for the called function
andl $-8, %esp // what happens here and why?
subl $24, %esp // here, it leaves room for local variables, but why 24 bytes? a, b, c: the size of each of them is 4 Bytes. So 3*4 = 12
movl $3, 20(%esp) // 3 gets pushed on the stack
movl $5, 16(%esp) // 5 also get pushed on the stack
movl 16(%esp), %eax // what does 16(%esp) mean and what happened with z?
movl %eax, 4(%esp) // we got the here as well
movl 20(%esp), %eax // and also here
movl %eax, (%esp) // what does happen in this line?
call multiply // thats clear, the function multiply gets called
movl %eax, 12(%esp) // it looks like the same as two lines before, except it contains the number 12
movl $0, %eax // I suppose, this line is because of "return 0;"
leave
ret
Negative references relative to %ebp are for local variables on the stack.
movl 8(%ebp), %eax // I don't know quite what "8(%ebp) mean? It has to do something with res, because`
%eax = x
imull 12(%ebp), %eax // here is the multiplication done. And again "12(%ebp).
%eax = %eax * y
movl %eax, -8(%ebp) // Now, we got a negative number in front of. How to interpret this?
(u_int32_t)res = %eax // sets low 32 bits of res
movl $0, -4(%ebp) // here as well
clears upper 32 bits of res to extend 32-bit multiplication result to uint64_t
movl -8(%ebp), %eax // and here again.
movl -4(%ebp), %edx // also here
return ret; //64-bit results are returned as a pair of 32-bit registers %edx:%eax
As for the main, see x86 calling convention which may help making sense of what happens.
andl $-8, %esp // what happens here and why?
stack boundary is aligned by 8. I believe it's ABI requirement
subl $24, %esp // here, it leaves room for local variables, but why 24 bytes? a, b, c: the size of each of them is 4 Bytes. So 3*4 = 12
Multiples of 8 (probably due to alignment requirements)
movl $3, 20(%esp) // 3 gets pushed on the stack
a = 3
movl $5, 16(%esp) // 5 also get pushed on the stack
b = 5
movl 16(%esp), %eax // what does 16(%esp) mean and what happened with z?
%eax = b
z is at 12(%esp) and is not used yet.
movl %eax, 4(%esp) // we got the here as well
put b on the stack (second argument to multiply())
movl 20(%esp), %eax // and also here
%eax = a
movl %eax, (%esp) // what does happen in this line?
put a on the stack (first argument to multiply())
call multiply // thats clear, the function multiply gets called
multiply returns 64-bit result in %edx:%eax
movl %eax, 12(%esp) // it looks like the same as two lines before, except it contains the number 12
z = (uint32_t) multiply()
movl $0, %eax // I suppose, this line is because of "return 0;"
yup. return 0;
Arguments are pushed onto the stack when the function is called. Inside the function, the stack pointer at that time is saved as the base pointer. (You got that much already.) The base pointer is used as a fixed location from which to reference arguments (which are above it, hence the positive offsets) and local variables (which are below it, hence the negative offsets).
The advantage of using a base pointer is that it is stable throughout the entire function, even when the stack pointer changes (due to function calls and new scopes).
So 8(%ebp) is one argument, and 12(%ebp) is the other.
The code is likely using more space on the stack than it needs to, because it is using temporary variables that could be optimized out of you had optimization turned on.
You might find this helpful: http://en.wikibooks.org/wiki/X86_Disassembly/Functions_and_Stack_Frames
I started typing this as a comment but it was getting too long to fit.
You can compile your example with -masm=intel so the assembly is more readable. Also, don't confuse the push and pop instructions with mov. push and pop always increments and decrements esp respectively before derefing the address whereas mov does not.
There are two ways to store values onto the stack. You can either push each item onto it one item at a time or you can allocate up-front the space required and then load each value onto the stackslot using mov + relative offset from either esp or ebp.
In your example, gcc chose the second method since that's usually faster because, unlike the first method, you're not constantly incrementing esp before saving the value onto the stack.
To address your other question in comment, x86 instruction set does not have a mov instruction for copying values from memory location a to another memory location b directly. It is not uncommon to see code like:
mov eax, [esp+16]
mov [esp+4], eax
mov eax, [esp+20]
mov [esp], eax
call multiply(unsigned int, unsigned int)
mov [esp+12], eax
Register eax is being used as an intermediate temporary variable to help copy data between the two stack locations. You can mentally translate the above as:
esp[4] = esp[16]; // argument 2
esp[0] = esp[20]; // argument 1
call multiply
esp[12] = eax; // eax has return value
Here's what the stack approximately looks like right before the call to multiply:
lower addr esp => uint32_t:a_copy = 3 <--. arg1 to 'multiply'
esp + 4 uint32_t:b_copy = 5 <--. arg2 to 'multiply'
^ esp + 8 ????
^ esp + 12 uint32_t:z = ? <--.
| esp + 16 uint32_t:b = 5 | local variables in 'main'
| esp + 20 uint32_t:a = 3 <--.
| ...
| ...
higher addr ebp previous frame

C push & pop test

Could someone please help me understand what the push_test() and pop_test() methods are attempting to test? These methods are attempting to test something and I can't figure out what that is.
The following code is written in c:
int push_test() {
int ret_val;
/* movl %esp, %eax ;; save stack pointer
pushl %esp ;; push stack pointer onto stack
popl %edx ;; pop the stack into %edx
subl %edx, %eax ;; subtract the two values
movl %eax, ret_val ;; set up the return value
*/
asm("movl %%esp, %%eax; pushl %%esp; popl %%edx; subl %%edx, %%eax; movl %%eax, %0"
: "=r" (ret_val)
: /* no input */
: "%edx", "%eax");
return ret_val;
}
int pop_test() {
int ret_val = 0xffffffff; /* -1 in decimal */
/* pushl ret_val ;; save ret_val on the stack
movl %esp, %edx ;; save the stack pointer
popl %esp ;; pop stack into the stack pointer
movl %esp, ret_val ;; set the popped value as the return value
movl %edx, %esp ;; restore original stack pointer
*/
asm("pushl %1; movl %%esp, %%edx; popl %%esp; movl %%esp, %0; movl %%edx, %%esp"
: "=r" (ret_val)
: "r" (ret_val)
: "%edx");
return ret_val;
}
int main() {
printf("push test: %d\n", push_test());
printf("pop test: %d\n", pop_test());
}
/* Output:
push test: 0
pop test: -1
*/
Your push_test() and pop_test() is saving the stack state, destroying the stack frame,
and then doing an operation based on values on the stack.
Let's go through each instruction of pop_test() and figure out what it does (push_test() is very similar in operation).
pushl ret_val pushes -1 onto the stack, incrementing the stack pointer (%esp), so right now your stack looks like: {-1}.
movl %esp, %edx copies your stack pointer into %edx, so %edx contains the memory address of position 1 on the stack, so right now your stack looks like : {-1}, %esp: stack[1], %edx : stack[1]
popl %esp pops -1 and stores it into %esp, so the stack looks like: {}, %esp: -1, %edx:stack[1]
movl %esp, ret_val takes the value of %esp, currently -1, and moves it into ret_val, so ret_val becomes -1.
finally movl %edx, %esp puts the value of %edx back in %esp and returns -1.
This method always returns -1. The idea is to push a value onto the stack, pop it back off, and see if the value stays the same. It also destroys and reforms the stack (by temporarily destroying and then restoring %esp). I would guess this is probably some learning assembly kind of deal and not an actual testing method.

Analyzing the assembly code generated to manipulate command line arguments

#include <stdio.h>
int main(int argc, char * argv[])
{
argv[1][2] = 'A';
return 0;
}
Here is the corresponding assembly code from GCC for a 32-bit Intel architecture. I can't totally understand what is going on.
main:
leal 4(%esp), %ecx - Add 4 to esp and store the address in ecx
andl $-16, %esp - Store first 28 bits from esp's address into esp??
pushl -4(%ecx) - Push the old esp on stack
pushl %ebp - Preamble
movl %esp, %ebp
pushl %ecx - push old esp + 4 on stack
movl 4(%ecx), %eax - move ecx + 4 to eax. this is the address of argv. argc stored at (%ecx).
addl $4, %eax - argv[1]
movl (%eax), %eax - argv[1][0]
addl $2, %eax - argv[1][2]
movb $65, (%eax) - move 'A'
movl $0, %eax - move return value (0)
popl %ecx - get old value of ecx
leave
leal -4(%ecx), %esp - restore esp
ret
What is going on in the beginning of the code before the preamble? Where is argv store according to the following code? On the stack?
The funny code (the first two lines) that you are seeing is the alignment of the stack to 16 bytes (-16 is the same as ~15, and x & ~15 rounds x to a multiple of 16).
argv would be stored at ESP + 8 when entering the function, what leal 4(%esp), %ecx does is create a pointer to a pseudo-struct containing argc and argv, then it proceeds to access them from there. movl 4(%ecx), %eax access argv from this pseudo-struct.
argv is a parameter to "main()", so in many ABIs, it will indeed be passed on the stack.

Resources