counting the '0's in a string in assembly

counting the '0's in a string in assembly - c

I have a little program written with c and assembly. the principle is simple: "count the '0's in a a string given by main.c
so for test0 str0ing 0 it should return 3 because there's 3 '0's in the string
the function itself is made in x86 asm with AT&T syntax, i am given a pointer to the string via C. both main.c and the asm is linked via header file.
this is my code so far and the problem is that it always returns 0. it never reaches conditional jump to increment %eax(to be returned)
// C
#include "asm.h"
char string[] = "2a0 a0 ";
char *ptr1 = string;
int main(){
printf("\nthere are : %d %s in :%s", zero_count(), "0s", string);
printf("\nstring address is: %p\n", ptr1);
return 0;
}
// x86asm
.global ptr1
.section .text
.global zero_count #func()
zero_count:
# prologue
pushl %ebp # save these previous stack frame pointer
movl %esp, %ebp # the stack frame pointer for function
# save registers
#pushl $ebx # needs to be pushed out of stack when used
#pushl %esi # needs to be pushed out of stack when used
#pushl %edi # needs to be pushed out of stack when used
# function body
movl ptr1, %ecx # moves the value of ptr1 to ecx
movl $0, %eax # cleans eax with 0
# loop start
loop_beginning:
cmp $0, (%ecx)
je end
# compare to 'o'
cmp $48, %ecx # 48 is "0" in the asci table
je if_0
increment_pointer:
addl $1, %ecx
jmp loop_beginning
if_0:
addl $1, %eax
jmp increment_pointer
end:
#popl %edi # needs to be popped when used
#popl %esi # needs to be popped when useds
#popl %ebx # needs to be popped when used
# epilogue
movl %ebp, %esp # restore the previous stack pointer("cleaner" the stack)
popl %ebp # restore the previous stack frame pointer
ret #w returns
i apolagize for using global variables in advance, i know it's not good but I'm still learning to use the stack

by switching the cmp to cmpb worked in this string. but i still don't know why. would this operation also worked if this was an int[]?
.global ptr1
.section .text
.global zero_count #func()
zero_count:
# prologue
pushl %ebp # save these previous stack frame pointer
movl %esp, %ebp # the stack frame pointer for function
# save registers
#pushl $ebx # needs to be pushed out of stack when used
#pushl %esi # needs to be pushed out of stack when used
#pushl %edi # needs to be pushed out of stack when used
# function body
mov ptr1, %ecx # moves the value of ptr1 to ecx
movl $0, %eax # cleans eax with 0
# loop start
loop_beginning:
cmpb $0, (%ecx)
je end
# compare to 'o'
cmpb $48, (%ecx) # 48 is "0" in the asci table
je if_0
increment_pointer:
addl $1, %ecx
jmp loop_beginning
if_0:
addl $1, %eax
jmp increment_pointer
# movl (%ecx), %eax
end:
#popl %edi # needs to be popped when used
#popl %esi # needs to be popped when useds
#popl %ebx # needs to be popped when used
# epilogue
movl %ebp, %esp # restore the previous stack pointer("cleaner" the stack)
popl %ebp # restore the previous stack frame pointer
ret #w returns

Related

How does ESP and EBP registers act when a new program is executed?

I recently went through an Assembly language book by Richard Blum wherein there was a subject on the C program to assembly conversion.
Consider the following C program:
#include <stdio.h>
int main(){
int a=100;
int b=25;
if (a>b)
printf("The higher value is %d\n", a);
else
printf("The higher value is %d\n", b);
return 0;
}
when I compiled the above program using -S parameter as:
gcc -S abc.c
I got the following result:
.file "abc.c"
.section .rodata
.LC0:
.string "The higher value is %d\n"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
leal 4(%esp), %ecx
.cfi_def_cfa 1, 0
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
.cfi_escape 0x10,0x5,0x2,0x75,0
movl %esp, %ebp
pushl %ecx
.cfi_escape 0xf,0x3,0x75,0x7c,0x6
subl $20, %esp
movl $100, -16(%ebp)
movl $25, -12(%ebp)
movl -16(%ebp), %eax
cmpl -12(%ebp), %eax
jle .L2
subl $8, %esp
pushl -16(%ebp)
pushl $.LC0
call printf
addl $16, %esp
jmp .L3
.L2:
subl $8, %esp
pushl -12(%ebp)
pushl $.LC0
call printf
addl $16, %esp
.L3:
movl $0, %eax
movl -4(%ebp), %ecx
.cfi_def_cfa 1, 0
leave
.cfi_restore 5
leal -4(%ecx), %esp
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 6.2.0-5ubuntu12) 6.2.0 20161005"
.section .note.GNU-stack,"",#progbits
What I cant understand is this:
Snippet
.LFB0:
.cfi_startproc
leal 4(%esp), %ecx
.cfi_def_cfa 1, 0
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
.cfi_escape 0x10,0x5,0x2,0x75,0
movl %esp, %ebp
pushl %ecx
.cfi_escape 0xf,0x3,0x75,0x7c,0x6
subl $20, %esp
I am unable to predict what is happening with the ESP and EBP register. About EBP, I can understand to an extent that it is used as a local stack and so it's value is saved by pushing onto stack.
Can you please elaborate the above snippet?

This is a special form of function entry-sequence suitable for the main()
function. The compiler knows that main() really is called as main(int argc, char **argv, char **envp), and compiles this function according to that very special behavior. So what's sitting on the stack when this code is reached is four long-size values, in this order: envp, argv, argc, return_address.
So that means that the entry-sequence code is doing something like this
(rewritten to use Intel syntax, which frankly makes a lot more sense
than AT&T syntax):
; Copy esp+4 into ecx. The value at [esp] has the return address,
; so esp+4 is 'argc', or the start of the function's arguments.
lea ecx, [esp+4]
; Round esp down (align esp down) to the nearest 16-byte boundary.
; This ensures that regardless of what esp was before, esp is now
; starting at an address that can store any register this processor
; has, from the one-byte registers all the way up to the 16-byte xmm
; registers
and esp, 0xFFFFFFF0
; Since we copied esp+4 into ecx above, that means that [ecx] is 'argc',
; [ecx+4] is 'argv', and [ecx+8] is 'envp'. For whatever reason, the
; compiler decided to push a duplicate copy of 'argv' onto the function's
; new local frame.
push dword ptr [ecx+4]
; Preserve 'ebp'. The C ABI requires us not to damage 'ebp' across
; function calls, so we save its old value on the stack before we
; change it.
push ebp
; Set 'ebp' to the current stack pointer to set up the function's
; stack frame for real. The "stack frame" is the place on the stack
; where this function will store all its local variables.
mov ebp, esp
; Preserve 'ecx'. Ecx tells us what 'esp' was before we munged 'esp'
; in the 'and'-instruction above, so we'll need it later to restore
; 'esp' before we return.
push ecx
; Finally, allocate space on the stack frame for the local variables,
; 20 bytes worth. 'ebp' points to 'esp' plus 24 by this point, and
; the compiler will use 'ebp-16' and 'ebp-12' to store the values of
; 'a' and 'b', respectively. (So under 'ebp', going down the stack,
; the values will look like this: [ecx, unused, unused, a, b, unused].
; Those unused slots are probably used by the .cfi pseudo-ops for
; something related to exception handling.)
sub esp, 20
At the other end of the function, the inverse operations are used to put
the stack back the way it was before the function was called; it may be
helpful to examine what they're doing as well to understand what's happening
at the beginning:
; Return values are always passed in 'eax' in the x86 C ABI, so set
; 'eax' to the return value of 0.
mov eax, 0
; We pushed 'ecx' onto the stack a while back to save it. This
; instruction pulls 'ecx' back off the stack, but does so without
; popping (which would alter 'esp', which doesn't currently point
; to the right location).
mov ecx, [ebp+4]
; Magic instruction! The 'leave' instruction is designed to shorten
; instruction sequences by "undoing" the stack in a single op.
; So here, 'leave' means specifically to do the following two
; operations, in order: esp = ebp / pop ebp
leave
; 'esp' is now set to what it was before we pushed 'ecx', and 'ebp'
; is back to the value that was used when this function was called.
; But that's still not quite right, so we set 'esp' specifically to
; 'ecx+4', which is the exact opposite of the very first instruction
; in the function.
lea esp, [ecx+4]
; Finally, the stack is back to the way it was when we were called,
; so we can just return.
ret

Segmentation fault when calling assembly function from C code

I'm trying to link assembly functions to a C code for exercise.
Here's my assembly function, written in x86 assembly:
.code32
.section .text
.globl max_function
.type max_function, #function
# i parametri saranno in ordine inverso a partire da 8(%ebp)
max_function:
pushl %ebp # save ebp
movl %esp, %ebp # new frame function
movl $0, %edi # first index is 0
movl 8(%ebp), %ecx # ecx is loaded with the number of elements
cmpl $0, %ecx # check that the number of elements is not 0
je end_function_err #if it is, exit
movl 12(%ebp),%edx # edx is loaded with the array base
movl (%edx), %eax # first element of the array
start_loop:
incl %edi #increment the index
cmpl %edi,%ecx #if it's at the end quit
je loop_exit
movl (%edx,%edi,4),%ebx #pick the value
cmpl %ebx,%eax #compare with actual maximum value
jle start_loop #less equal -> repeat loop
movl %ebx,%eax #greater -> update value
jmp start_loop #repeat loop
loop_exit:
jmp end_function #finish
end_function: #exit operations
movl %ebp, %esp
popl %ebp
ret
end_function_err:
movl $0xffffffff, %eax #return -1 and quit
jmp end_function
It basically defines a function that finds the maximum number of an array (or it should be)
And my C code:
#include <stdio.h>
#include <stdlib.h>
extern int max_function(int size, int* values);
int main(){
int values[] = { 4 , 5 , 7 , 3 , 2 , 8 , 5 , 6 } ;
printf("\nMax value is: %d\n",max_function(8,values));
}
I compile them with gcc -o max max.s max.c.
I get a SegmentationFault when executing the code.
My suspect is that I don't access the value in a right manner, but I can't see why, even because I based my code on an example code that prints argc and argv values when called from the command line.
I'm running Debian 8 64-bit

The problems were:
not preserving %ebx and %edi
not compiling for 32 bit (had to use -m32 flag for gcc)
cmpl operands were inverted
Thanks everybody, problem is solved.
I'll focus more on debugging tools to (disassembling and running step by step was very useful)!

Why this asm code not doubling the value the pointer points

I am trying to interface c code with asm.
But it is not working correctly and I am not able to find the problem.
program.c
#include<stdio.h>
int *asm_multi(int *ptr);
int main()
{
int i=10;
int *p=&i;
asm_multi(p);
printf("%d\n",*p);
return 0;
}
code.asm
.section .text
.global asm_multi
.type asm_multi,#function
asm_multi:
pushl %ebp
movl %esp,%ebp
movl 8(%ebp),%eax
movl %eax,%edx
leal (%edx,%edx,1),%edx
movl %edx,%eax
movl %ebp,%esp
popl %ebp
ret
I am creating the final executable by
as code.asm -o code.o
gcc program.c code.o -o output
./output
The output it prints is :10 whereas I am expecting: 20
What is the problem in the code? Don't consider the efficiency of the program. I have just started asm programming.
I created above code after reading from a more complex example kept at this link. This works perfectly.

You should learn to use a debugger as soon as possible. It not only helps you find bugs, but also allows you to exactly see what the cpu is doing at each instruction and you can compare that to your intentions.
Also, comment your code, especially when asking for help, so we can tell you where the instructions don't match your intentions, if you were unable to do so yourself.
Let's comment your code then:
asm_multi:
pushl %ebp
movl %esp,%ebp
movl 8(%ebp),%eax # fetch first argument, that is p into eax
movl %eax,%edx # edx = p too
leal (%edx,%edx,1),%edx # edx = eax + edx = 2 * p
movl %edx,%eax # eax = edx = 2 * p
movl %ebp,%esp
popl %ebp
ret
As you can see, there are two problems:
You are doubling the pointer not the value it points to
You are not writing it back into memory, just returning it in eax which is then ignored by the C code
A possible fix:
asm_multi:
pushl %ebp
movl %esp,%ebp
movl 8(%ebp),%eax # fetch p
shll $1, (%eax) # double *p by shifting 1 bit to the left
# alternatively
# movl (%eax), %edx # fetch *p
# addl %edx, (%eax) # add *p to *p, doubling it
movl %ebp,%esp
popl %ebp
ret

Infinite loop in my simple assembly program

I am learning loops and jumps in assembly and I try to make a simple loop. I want the printf command to be called 10 times. I have set the counter variable to 1. I have also set %edx to 1 and then I increment it for every iteration. If it is equal to 10, then we should exit the loop. But now the loop is infinite. I have debugged with gdb and %edx seems to be overwritten in the printf function. That is why I push %edx to the stack and the pop it back after the printf call, but it doesn't work. What have I missed?
.section .data
output:
.asciz "Value is %d\n"
val1:
.int 123
counter:
.int 1
.section .text
.globl _start
_start:
nop
movl counter, %edx # start at 1
gohere:
movl val1, %ebx # move value 123 to %ebx
pushl %edx # push %edx to stack
pushl %ebx # push %ebx to stack
pushl $output
call printf # call printf
popl %edx # pop %edx value
inc %edx
cmp $10, %edx # if %edx is less than 10...
jl gohere # ... go to gohere, otherwise exit
movl $0, %ebx
movl $1, %eax
int $0x80

you pushed output as the last push so the first pop will pop output. it is Stack and it is LIFO. in your code output will be in edx after you pop it.
to solve it put two pops before popl edx:
popl output
popl ebx

previous stack variables

I have this problem, I am recursively calling a function in C and C is lexically scoped, so I can only access the current stack frame. I want to extract the arguments and the local variables from the previous stack frame which was created under the previous function call while im on the current stack frame
I know that the values from the previous recursive call are still on the stack, but I cant access access these values because they're "buried" under the active stack frame?
I want to extract the arguments and local variables from the previous stack and copy them to copy_of_buried_arg and copy_of_buried_loc;
It is a requirement to use inline assembly using GAS to extract the variables, this is what I have so far, and I tried all day, I cant seem to figure it out, I drew the stack on paper and did the calculations but nothing is working, I also tried deleting calls to printf so the stack will be cleaner but I cant figure out the right arithmetic. Here is the code so far, my function halts on the second iteration
#include <stdio.h>
char glo = 97; // just for fun 97 is ascii lowercase 'a'
int copy_of_buried_arg;
char copy_of_buried_loc;
void rec(int arg) {
char loc;
loc = glo + arg * 2; // just for fun, some char arithmetic
printf("inside rec() arg=%d loc='%c'\n", arg, loc);
if (arg != 0) {
// after this assembly code runs, the copy_of_buried_arg and
// copy_of_buried_loc variables will have arg, loc values from
// the frame of the previous call to rec().
__asm__("\n\
movl 28(%esp), %eax #moving stack pointer to old ebp (pointing it to old ebp)\n\
addl $8, %eax #now eax points to the first argument for the old ebp \n\
movl (%eax), %ecx #copy the value inside eax to ecx\n\
movl %ecx, copy_of_buried_arg # copies the old argument\n\
\n\
");
printf("copy_of_buried_arg=%u copy_of_buried_loc='%c'\n",
copy_of_buried_arg, copy_of_buried_loc);
} else {
printf("there is no buried stack frame\n");// runs if argument = 0 so only the first time
}
if (arg < 10) {
rec(arg + 1);
}
}
int main (int argc, char **argv) {
rec(0);
return 0;
}

I can try to help, but don't have Linux or assembly in GAS. But the calculations should be similar:
Here's the stack after a couple of calls. A typical stack frame setup creates a linked list of stack frames, where EBP is the current stack frame and points to its old value for the previous stack frame.
+-------+
ESP-> |loc='c'| <- ESP currently points here.
+-------+
EBP-> |oldEBP |--+ <- rec(0)'s call frame
+-------+ |
|retaddr| | <- return value of rec(1)
+-------+ |
|arg=1 | | <- pushed argument of rec(1)
+-------+ |
|loc='a'| | <- local variable of rec(0)
+-------+ |
+--|oldEBP |<-+ <- main's call frame
| +-------+
| |retaddr| <- return value of rec(0)
| +-------+
| |arg=0 | <- pushed argument of rec(0)
| +-------+
\|/
to main's call frame
This is created by the following sequence:
Push arguments last arg first.
Call the function, pushing a return address.
Push soon-to-be old EBP, preserving previous stack frame.
Move ESP (top of stack, containing oldEBP) into EBP, creating new stack frame.
Subtract space for local variables.
This has the effect on a 32-bit stack that EBP+8 will always be the first parameter of the call, EBP+12 the 2nd parameter, etc. EBP-n is always an offset to a local variable.
The code to get the previous loc and arg is then (in MASM):
mov ecx,[ebp] // get previous stack frame
mov edx,[ecx]+8 // get first argument
mov copy_of_buried_arg,edx // save it
mov dl,[ecx]-1 // get first char-sized local variable.
mov copy_of_buried_loc,dl // save it
or my best guess in GAS (I don't know it but know it is backwards to MASM):
movl (%ebp),ecx
movl 8(%ecx),edx
movl edx,copy_of_buried_arg
movb -1(%ecx),dl
movb dl,copy_of_buried_loc
Output of your code with my MASM using VS2010 on Windows:
inside rec() arg=0 loc='a'
there is no buried stack frame
inside rec() arg=1 loc='c'
copy_of_buried_arg=0 copy_of_buried_loc='a'
inside rec() arg=2 loc='e'
copy_of_buried_arg=1 copy_of_buried_loc='c'
inside rec() arg=3 loc='g'
copy_of_buried_arg=2 copy_of_buried_loc='e'
inside rec() arg=4 loc='i'
copy_of_buried_arg=3 copy_of_buried_loc='g'
inside rec() arg=5 loc='k'
copy_of_buried_arg=4 copy_of_buried_loc='i'
inside rec() arg=6 loc='m'
copy_of_buried_arg=5 copy_of_buried_loc='k'
inside rec() arg=7 loc='o'
copy_of_buried_arg=6 copy_of_buried_loc='m'
inside rec() arg=8 loc='q'
copy_of_buried_arg=7 copy_of_buried_loc='o'
inside rec() arg=9 loc='s'
copy_of_buried_arg=8 copy_of_buried_loc='q'
inside rec() arg=10 loc='u'
copy_of_buried_arg=9 copy_of_buried_loc='s'

With my compiler (gcc 3.3.4) I ended up with this:
#include <stdio.h>
char glo = 97; // just for fun 97 is ascii lowercase 'a'
int copy_of_buried_arg;
char copy_of_buried_loc;
void rec(int arg) {
char loc;
loc = glo + arg * 2; // just for fun, some char arithmetic
printf("inside rec() arg=%d loc='%c'\n", arg, loc);
if (arg != 0) {
// after this assembly code runs, the copy_of_buried_arg and
// copy_of_buried_loc variables will have arg, loc values from
// the frame of the previous call to rec().
__asm__ __volatile__ (
"movl 40(%%ebp), %%eax #\n"
"movl %%eax, %0 #\n"
"movb 31(%%ebp), %%al #\n"
"movb %%al, %1 #\n"
: "=m" (copy_of_buried_arg), "=m" (copy_of_buried_loc)
:
: "eax"
);
printf("copy_of_buried_arg=%u copy_of_buried_loc='%c'\n",
copy_of_buried_arg, copy_of_buried_loc);
} else {
printf("there is no buried stack frame\n");// runs if argument = 0 so only the first time
}
if (arg < 10) {
rec(arg + 1);
}
}
int main (int argc, char **argv) {
rec(0);
return 0;
}
Here's the disassembly of the relevant part (get it with gcc file.c -S -o file.s):
_rec:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movl 8(%ebp), %eax
addl %eax, %eax
addb _glo, %al
movb %al, -1(%ebp)
subl $4, %esp
movsbl -1(%ebp),%eax
pushl %eax
pushl 8(%ebp)
pushl $LC0
call _printf
addl $16, %esp
cmpl $0, 8(%ebp)
je L2
/APP
movl 40(%ebp), %eax #
movl %eax, _copy_of_buried_arg #
movb 31(%ebp), %al #
movb %al, _copy_of_buried_loc #
/NO_APP
subl $4, %esp
movsbl _copy_of_buried_loc,%eax
pushl %eax
pushl _copy_of_buried_arg
pushl $LC1
call _printf
addl $16, %esp
jmp L3
L2:
subl $12, %esp
pushl $LC2
call _printf
addl $16, %esp
L3:
cmpl $9, 8(%ebp)
jg L1
subl $12, %esp
movl 8(%ebp), %eax
incl %eax
pushl %eax
call _rec
addl $16, %esp
L1:
leave
ret
Those offsets from ebp (40 and 31) initially were set to an arbitrary guess value (e.g. 0) and then refined through observation of the disassembly and some simple calculations.
Note that the function uses extra 12+4=16 bytes of stack for the alignment and the parameter when it calls itself recursively:
subl $12, %esp
movl 8(%ebp), %eax
incl %eax
pushl %eax
call _rec
addl $16, %esp
There are also 4 bytes of the return address.
And then the function uses 4+8=12 bytes for the old ebp and its local variables:
_rec:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
So, in total the stack grows by 16+4+12=32 bytes with each recursive call.
Now, we know how to get our local arg and loc through ebp:
movl 8(%ebp), %eax ; <- arg
addl %eax, %eax
addb _glo, %al
movb %al, -1(%ebp) ; <- loc
So, we just add 32 to those offsets 8 and -1 and arrive at 40 and 31.
Do the same and you'll get your "buried" variables.

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

counting the '0's in a string in assembly - c

Related

How does ESP and EBP registers act when a new program is executed?

Segmentation fault when calling assembly function from C code

Why this asm code not doubling the value the pointer points

Infinite loop in my simple assembly program

previous stack variables

Categories

Resources