I'm stuck with this. I'm self studying assenbler and translating some basics instructions. But i can't with this one.
Can anyone help me, please?
int
secuencia ( int n, EXPRESION * * o )
{
int a, i;
for ( i = 0; i < n; i++ ){
a = evaluarExpresion( *o );
// Im trying to do this: o++;
__asm {
mov eax,dword ptr [o]
mov ecx,dword ptr [eax]
inc [ecx]
}
}
return a ;
}
I wrote the inside for and works, but still don't know how to increment O
int
secuencia ( int n, EXPRESION * * o )
{
int a, i;
for ( i = 0; i < n; i++ ){
__asm {
mov eax,dword ptr [o]
mov ecx,dword ptr [eax]
push ebp
mov ebp, esp
push ecx
call evaluarExpresion
mov esp, ebp
pop ebp
mov a, eax
}
o++;
}
return a ;
}
mov esi, o
add esi, 4 //increment is here
Line1 : We move your o pointer to the esi register.
Line2: We increment your o pointer
or
mov eax, o
mov esi, [eax]
add esi, 4
I don't understand perfectly what you are trying to do but I hope it helped!
There are two options:
Either:
move the value of o from memory into a register (eax, for example)
increment the register
move the value from the register back to memory
or
increment the value stored in memory directly
Try to use both methods.
Related
I'm trying to get the hang of Assembly for class. So for this C code:
int a = 10;
int b = 20;
int *aPtr = &a;
int *bPtr = &b;
b += a;
*aPtr = *aPtr + *bPtr; //dereference
printf(“aPtr points to value: %d\n”, *aPtr);
*** Updated
I tried this in Assembly:
.data
var1 DWORD 10
var2 DWORD 20
var3 DWORD ?
.code
main PROC
mov eax, 10
mov ebx, 20
add ebx, eax
mov var3, ebx
mov eax, offset var1
mov ebx, offset var3
mov ecx, [eax]
mov edx, [ebx]
add ecx, edx
mov var3, ecx
INVOKE ExitProcess, 0
main endp
end
But I know that the pointers can't simply be deferenced and added together like that. We also can't use lea, so I'm at a loss on how to add a dereferenced value to another dereferenced value in Assembly; I'm also not sure how I would convert the printf statement correctly. Could I get some help?
Your code is not yet updating the a and b variables with the results from the operations.
int a = 10;
int b = 20;
int *aPtr = &a;
int *bPtr = &b;
a SDWORD 10
b SDWORD 20
aPtr DWORD offset a
bPtr DWORD offset b
b += a;
mov eax, a
add b, eax ; Result in b (30)
*aPtr = *aPtr + *bPtr;
mov edi, aPtr
mov esi, bPtr
mov eax, [edi]
add eax, [esi]
mov [edi], eax ; Result in a (40)
printf(“aPtr points to value: %d\n”, *aPtr);
msg db 'aPtr points to value: %d\n', 0
...
mov edi, aPtr
mov eax, [edi]
push eax
push offset msg
call _printf
add esp, 8
I'm trying to learn some basic x86 32-bit assembly programming. So in pursuing this I decided to implement quicksort in assembly (sorting only integers). First I made a C-version of the sorting function and then I made an assembly version.
However, when comparing my assembly version with the my C-version (compiled with gcc on Debian), the C-version performs more then 10 times faster on a array of 10000 integers.
So my question is if anybody can give some feedback on obvious optimizations that can be made on my quick sort assembly routine. It's purely for educational purposes and I'm not expecting to beat the compiler makers in terms of producing high speed code but I'm interested in knowing if I'm making any obvious mistakes that hampers speed.
The C-version:
void myqsort(int* elems, int sidx, int eidx)
{
if (sidx < eidx)
{
int pivot = elems[eidx];
int i = sidx;
for (int j = sidx; j < eidx; j++)
{
if (elems[j] <= pivot)
{
swap(&elems[i], &elems[j]);
i = i + 1;
}
}
swap(&elems[i], &elems[eidx]);
myqsort(elems, sidx, i - 1);
myqsort(elems, i + 1, eidx);
}
}
void swap(int* a, int* b)
{
int tmp = *a;
*a = *b;
*b = tmp;
}
Assembly version (NASM):
;
; void asm_quick_sort(int* elems, int startindex, int endindex)
; Params:
; elems - pointer to elements to sort - [ebp + 0x8]
; sid - start index of items - [ebp + 0xC]
; eid - end index of items - [ebp + 0x10]
asm_quick_sort:
push ebp
mov ebp, esp
push edi
push esi
push ebx
mov eax, dword [ebp + 0xC] ; store start index, = i
mov ebx, dword [ebp + 0x10] ; store end index
mov esi, dword [ebp + 0x8] ; store pointer to first element in esi
cmp eax, ebx
jnl qsort_done
mov ecx, eax ; ecx = j, = sid
mov edx, dword [esi + (0x4 * ebx)] ; pivot element, elems[eid], edx = pivot
qsort_part_loop:
; for j = sid; j < eid; j++
cmp ecx, ebx ; if ecx < end index
jnb qsort_end_part
; if elems[j] <= pivot
cmp edx, dword [esi + (0x4*ecx)]
jb qsort_cont_loop
; do swap, elems[i], elems[j]
push edx ; save pivot for now
mov edx, dword [esi + (0x4*ecx)] ; edx = elems[j]
mov edi, dword [esi + (0x4*eax)] ; edi = elems[i]
mov dword [esi + (0x4*eax)], edx ; elems[i] = elems[j]
mov dword [esi + (0x4*ecx)], edi ; elems[j] = elems[i]
pop edx ; restore pivot
; i++
add eax, 0x1
qsort_cont_loop:
add ecx, 0x1
jmp qsort_part_loop
qsort_end_part:
; do swap, elems[i], elems[eid]
mov edx, dword [esi + (0x4*eax)] ; edx = elems[i]
mov edi, dword [esi + (0x4*ebx)] ; edi = elems[eid]
mov dword [esi + (0x4*ebx)], edx ; elems[eidx] = elems[i]
mov dword [esi + (0x4*eax)], edi ; elems[i] = elems[eidx]
; qsort(elems, sid, i - 1)
; qsort(elems, i + 1, eid)
sub eax, 0x1
push eax
push dword [ebp + 0xC] ; push start idx
push dword [ebp + 0x8] ; push elems vector
call asm_quick_sort
add esp, 0x8
pop eax
add eax, 0x1
push dword [ebp + 0x10] ; push end idx
push eax
push dword [ebp + 0x8] ; push elems vector
call asm_quick_sort
add esp, 0xC
qsort_done:
pop ebx
pop esi
pop edi
mov esp, ebp
pop ebp
ret
I call the assembly routine from C and I use clock() for timing the routines.
EDIT
The difference in performance is no longer an issue after correcting the bugs pointed out by my fellow stackoverflowers.
You have an error in your assembly sort implementation, and speed comparisons are useless until you resolve it. The problem is the recursive call:
myqsort(elems, sidx, i - 1);
Seeing as it is not necessarily the case that i is not sidx, this might pass a value less than sidx to the function, including -1 if sidx is 0. This is handled in your C implementation:
if (sidx < eidx)
But in your assembly version:
cmp eax, ebx
jae qsort_done
That's an unsigned comparison branch instruction! You should be using jge. I see a segfault due to this problem. When fixed, the performance of both implementations appears to be roughly the same according to my quick tests (compiling with -O3). I used the following test driver:
#include <stdlib.h>
#include <stdio.h>
void myqsort(int * elems, int sidx, int eidx);
#define SIZE 100000
int main(int argc, char **argv)
{
int * elems = malloc(SIZE * sizeof(int));
for (int j = 0; j < 1000; j++) {
for (int i = 0; i < SIZE; i++) {
elems[i] = rand();
}
myqsort(elems, 0, SIZE - 1);
}
return 0;
}
With the C version, run-time was approx 5.854 seconds.
With the assembly version, it was 5.829 seconds (i.e. slightly faster).
You can optimize the swapping of elements using only 1 additional register EDI and without the need for pushing and popping the pivot value in EDX:
mov edi, dword [esi + (0x4*eax)] ; edi = elems[i]
xchg dword [esi + (0x4*ecx)], edi ; elems[j] = edi, edi = elems[j]
mov dword [esi + (0x4*eax)], edi ; elems[i] = edi
The second swap can also be shortened:
mov edi, dword [esi + (0x4*ebx)] ; edi = elems[eid]
xchg dword [esi + (0x4*eax)], edi ; elems[i] = edi, edi = elems[i]
mov dword [esi + (0x4*ebx)], edi ; elems[eid] = edi
You can safely remove the mov esp, ebp from your epilog code because it is redundant. If those 3 pop's went well you already know that the stackpointer has the correct value.
qsort_done:
pop ebx
pop esi
pop edi
mov esp, ebp <-- This is useless!
pop ebp
ret
I am finishing up an assembly program that replaces characters in a string with a given replacement character. The assembly code calls C functions and the assembly program itself is called from main in my .c file. However, when trying to finish and return a final int value FROM the assembly program TO C, I get segfaults. My .asm file is as follows:
; File: strrepl.asm
; Implements a C function with the prototype:
;
; int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
;
;
; Result: chars in string are replaced with the replacement character and string is returned.
SECTION .text
global strrepl
_strrepl: nop
strrepl:
push ebp ; set up stack frame
mov ebp, esp
push esi ; save registers
push ebx
xor eax, eax
mov ecx, [ebp + 8] ;load string (char array) into ecx
jecxz end ;jump if [ecx] is zero
mov al, [ebp + 12] ;move the replacement character into esi
mov edx, [ebp + 16] ;move function pointer into edx
firstLoop:
xor eax, eax
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
push eax ; parameter for (*isinsubset)
mov edx, [ebp + 16]
call edx ; execute (*isinsubset)
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
mov eax, [ebp+12]
mov [ecx], al
mov edx, [ebp+16]
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
end:
pop ebx ; restore registers
pop esi
mov esp, ebp ; take down stack frame
pop ebp
mov eax, 9
push eax ;test
ret
and my c file is:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
//display *((char *) $edi)
// These functions will be implemented in assembly:
//
int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
int isvowel (int c) {
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
return 1 ;
if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
return 1 ;
return 0 ;
}
int main(){
char *str1;
int r;
str1 = strdup("ABC 123 779 Hello World") ;
r = strrepl(str1, '#', &isdigit) ;
printf("str1 = \"%s\"\n", str1) ;
printf("%d chararcters were replaced\n", r) ;
free(str1) ;
return 0;
}
In my assembly code, you can see in end
mov eax, 9
push eax
I am simply trying to return the value 9 to the value "r" which is an int in the C file. This is just a test to see if I can return an int back to r in the c file. Eventually I will be returning the number of characters that were replaced back to r. However, I need to figure out why the following code above is segfaulting. Any ideas?
mov eax, 9
push eax ; NOT a good idea
ret
That is a big mistake. It's going to return based on the lowest thing on the stack and you've just pushed something on to the stack that's almost certainly not a valid return address.
Most functions return a code by simply placing it into eax (this depends on calling convention of course but that's a pretty common one), there's generally no need to push it on to the stack, and certainly plenty of downside to doing so.
Return values are normally stored in EAX on X86 32 bit machines. So your pushing it on the stack after storing it in EAX is wrong, because the function it is returning to will try to use what is in EAX as a value for IP (instruction pointer)
Ret with no argument pops the return address off of the stack and jumps to it.
source
I want to know the corresponding c code of the following asm code, and if you can a explanation of the asm code
PUBLIC fonction2
_tab DW 0aH DUP (?)
_idx$ = 8 ; size =4
_value$ = 12 ; size =2
fonction2 PROC
push ebp
mov ebp, esp
mov eax, DWORD PTR _idx$[ebp]
mov cx, WORD PTR _value$[ebp]
mov word PTR _tab[eax*2], cx
pop ebp
ret 0
fonction2 ENDP
What's _idx$ and _value$.
Thanks for your help in advance.
void fonction2 (int idx, short value)
{
tab [idx] = value;
}
I need to translate what is commented within the method, to assembler. I have a roughly idea, but can't.
Anyone can help me please? Is for an Intel x32 architecture:
int
secuencia ( int n, EXPRESION * * o )
{
int a, i;
//--- Translate from here ...
for ( i = 0; i < n; i++ ){
a = evaluarExpresion( *o );
o++;
}
return a ;
//--- ... until here.
}
Translated code must be within __asm as:
__asm {
translated code
}
Thank you,
FINAL UPDATE:
This is the final version, working and commented, thanks to all for your help :)
int
secuencia ( int n, EXPRESION * * o )
{
int a = 0, i;
__asm
{
mov dword ptr [i],0 ; int i = 0
jmp salto1
ciclo1:
mov eax,dword ptr [i]
add eax,1 ; increment in 1 the value of i
mov dword ptr [i],eax ; i++
salto1:
mov eax,dword ptr [i]
cmp eax,dword ptr [n] ; Compare i and n
jge final ; If is greater goes to 'final'
mov eax,dword ptr [o]
mov ecx,dword ptr [eax] ; Recover * o (its value)
push ecx ; Make push of * o (At the stack, its value)
call evaluarExpresion ; call evaluarExpresion( * o )
add esp,4 ; Recover memory from the stack (4KB corresponding to the * o pointer)
mov dword ptr [a],eax ; Save the result of evaluarExpresion as the value of a
mov eax,dword ptr [o] ; extract the pointer to o
add eax,4 ; increment the pointer by a factor of 4 (next of the actual pointed by *o)
mov dword ptr [o],eax ; o++
jmp ciclo1 ; repeat
final: ; for's final
mov eax,dword ptr [a] ; return a - it save the return value at the eax registry (by convention this is where the result must be stored)
}
}
Essentially in assembly languages, strictly speaking there isn't a notion of a loop the same way there would be in a higher level language. It's all implemented with jumps (eg. as a "goto"...)
That said, x86 has some instructions with the assumption that you'll be writing "loops", implicitly using the register ECX as a loop counter.
Some examples:
mov ecx, 5 ; ecx = 5
.label:
; Loop body code goes here
; ECX will start out as 5, then 4, then 3, then 1...
loop .label ; if (--ecx) goto .label;
Or:
jecxz .loop_end ; if (!ecx) goto .loop_end;
.loop_start:
; Loop body goes here
loop .loop_start ; if (--ecx) goto .loop_start;
.loop_end:
And, if you don't like this loop instruction thing counting backwards... You can write something like:
xor ecx, ecx ; ecx = 0
.loop_start:
cmp ecx, 5 ; do (ecx-5) discarding result, then set FLAGS
jz .loop_end ; if (ecx-5) was zero (eg. ecx == 5), jump to .loop_end
; Loop body goes here.
inc ecx ; ecx++
jmp .loop_start
.loop_end:
This would be closer to the typical for (int i=0; i<5; ++i) { }
Note that
for (init; cond; advance) {
...
}
is essentially syntactic sugar for
init;
while(cond) {
...
advance;
}
which should be easy enough to translate into assembly language if you've been paying any attention in class.
Use gcc to generate the assembly code
gcc -S -c sample.c
man gcc is your friend
For that you would probably use the loop instruction that decrements the ecx (often called, extended counter) at each loop and goes out when ecx reaches zero.But why use inline asm for it anyway? I'm pretty sure something as simple as that will be optimized correctly by the compiler...
(We say x86 architecture, because it's based on 80x86 computers, but it's an "ok" mistake =p)