Why the following c function is same as the asm code

Why the following c function is same as the asm code - c

Can someone explain me why the following c code is equivalent to the asm code?
void arith1(){
a=(b*10)+(5/(-e));
}
Why do we put the value of b in a ecx register.
ASM code:
mov ecx, DWORD PTR _b
imul ecx, 10 ; $1 = b * 10
mov esi, DWORD PTR _e
neg esi ; $2 = -e
mov eax, 5
cdq
idiv esi ; $3 = 5 / -e
add ecx, eax ; $4 = $1 + $3
mov DWORD PTR _a, ecx ; a = $4

mov ecx, DWORD PTR _b
imul ecx, 10 ; edx:eax = b * 10
mov esi, DWORD PTR _e
neg esi ; esi = -e
mov eax, 5
cdq ; edx:eax = 5
idiv esi ; eax = 5 / -e
add ecx, eax ; ecx = b * 10 + 5 / -e
mov DWORD PTR _a, ecx ; store result to a
The only non-intuitive part is that the imul and idiv instructions combine the edx and eax registers to form a 64-bit value. The upper 32-bits are discarded after the imul instruction, C doesn't perform overflow checks. The cdq instruction (convert double to quad) is required to turn the 32-bit value into a 64-bit value before dividing.

mov ecx, DWORD PTR _b
Move the variable b into ecx register
imul ecx, 10 ; $1 = b * 10
Multiply the ecx register by 10. (b*10)
mov esi, DWORD PTR _e
Move the variable e into the esi register.
neg esi ; $2 = -e
Negate the esi register. (-e)
mov eax, 5
Move 5 into the eax register.
cdq
Convert eax into a quad-word (I think, not sure what it's needed for).
idiv esi ; $3 = 5 / -e
Divide eax by esi (5 / -e)
add ecx, eax ; $4 = $1 + $3
Add eax to ecx (b*10)+(5/-e).
mov DWORD PTR _a, ecx ; a = $4
Move ecx into variable a (a = (b*10)+(5/-e)).

Related

How to deal with non-fixed size arrays?

Today I'm working on arrays and addressing modes so I made this snippet to train myself. Everything is working fine (apparently) but I chose the easiest way using fixed size arrays. My question is about dynamic memory allocation. In my code, my arrays arrayand array2 have a fixed size (8 * 4(int)). If I wanted to extend them, or to speak with C concepts, make a realloc or something like depending on real time values for the size, may I have to implement system calls like sys_brk and mmapby myself in assembly
or is there an other option ? The other question deals with the part of my code used to reverse an array (label .reversing) I know they are faster solutions than mine so if you could give me some advices, it would be nice.
%define ARRAY_LEN 8
%macro print 2
mov eax, 4
mov ebx, 1
mov ecx, %1
mov edx, %2
int 0x80
%endmacro
%macro exit 0
xor ebx, ebx
mov eax, 1
int 0x80
%endmacro
section .bss
sumarrays resd 1
dynarray resd 256
section .rodata
highest equ 0x78
evensum equ 0x102
sumarray1 equ 0x1e6
sumarray2 equ 0x256
section .data
revarray times ARRAY_LEN dd 0
strok db "Good result !", 0xa, 0
lenstrok equ $ - strok
strerror db "Bad result !", 0xa, 0
lenerror equ $ - strerror
array dd 0x33, 0x21, 0x32, 0x78, 0x48, 0x77, 0x19, 0x10
array2 dd 0x12, 0x98, 0x65, 0x62, 0x4e, 0x3e, 0x1f, 0x3a
section .text
global sort
global sum
global _start
_start:
; sort array from highest value to lowest value
sub esp, 8
push ARRAY_LEN
push array
call sort
add esp, 8
and esp, 0xfffffff0
; copying array into some another ones : array -> revarray
mov esi, eax
mov edi, revarray
mov ecx, ARRAY_LEN
rep movsd
; now let's reverse the array
mov esi, revarray ;esi = address of array
mov ecx, ARRAY_LEN ;ecx = number of elements
lea edi, [esi + ecx * 4 - 4]
.reversing:
mov eax, [esi] ;eax = value at start
mov ebx, [edi] ;ebx = value at end
mov [edi], eax
mov [esi], ebx
add esi, 4 ;incrementing address of next item at start
sub edi, 4 ;incrementing address of next item at end
cmp esi, edi ;middle reached ?
jb .reversing ;no we continue
.reversed:
;after, we compute the sum of the elements in the array
mov esi, revarray ; address of array
mov edx, ARRAY_LEN ; num elem of array
sub esp, 8
call arraysum ; sum computation
add esp, 8
cmp eax, sumarray1 ; we check the result to be sure
jne .failure
; merging array2 with array
; esi = source address of first array
; edi = address of second array, the destination one
mov esi, array2
mov edi, array
sub esp, 8
call mergearrays
add esp, 8
; we compute the sum of the merged arrays into the new (old one in fact)
mov esi, array
mov ecx, ARRAY_LEN
add esp, 8
call arraysum
add esp, 8
cmp eax, 0x43c ; we check the result to be sure
jne .failure ; if not egal, exit
; compute the sum of even numbers only in the array
; set up edx to 0 at beginning of loop
; cmova edx, $value_even_number
mov esi, revarray
xor ebx, ebx
xor ecx, ecx
.iterate:
xor edx, edx ; setting up edx to 0
mov eax, [esi + ecx * 4]
test al, 1 ; CF == 0 ? so it's an even number
cmovz edx, eax ; we store the value of current element in edx
add ebx, edx ; so in case of odd number => add 0
inc ecx
cmp ecx, ARRAY_LEN ; end of array reached ?
jne .iterate ; no, we continue
cmp ebx, 0x102 ; check if result is good
je .success
jmp .failure
exit
.success:
print strok, lenstrok
exit
.failure:
print strerror, lenerror
exit
; ********************************************
; computes the sum of all elements of an array
; esi = address of the array
; edx = number of elements in array
; output : eax
; ********************************************
arraysum:
push ebx
xor ebx, ebx
xor ecx, ecx
.arraysum:
mov eax, [esi + ecx *4]
add ebx, eax
inc ecx
cmp ecx, edx
je .sumdone
jmp .arraysum
.sumdone:
mov eax, ebx
pop ebx
ret
; *********************************************
; procedure that sort array from highest to lowest value
; first arg = address of the array
; second arg = number of elements in a array
; return : eax, sorted array
; ********************************************
sort:
push ebp
mov ebp, esp
mov edx, [ebp + 12]
.loop1:
mov esi, [ebp + 8]
mov ecx, [ebp + 12]
.loop2:
mov eax, [esi]
mov ebx, [esi + 4]
cmp eax, ebx
jg .skip
mov [esi], ebx
mov [esi + 4], eax
.skip:
add esi, 4 ;perform loop checks
dec ecx
cmp ecx, 1
ja .loop2
dec edx
ja .loop1
mov eax,[ebp + 8] ; save result in eax
mov esp,ebp
pop ebp
ret
; ****************************************************
; function adding two arrays merging them
; esi = address of first array
; edi = address of second array (destination array)
; ****************************************************
mergearrays:
xor ecx, ecx
.merging:
mov eax, [esi + ecx * 4]
xadd [edi + ecx * 4], eax
inc ecx
cmp ecx, ARRAY_LEN
je .mergedone
jmp .merging
.mergedone:
ret

cmp instruction not making jge jump within InsertionSort

I am trying to write an iterative version of InsertionSort using MASM. After repeatedly getting unexpected errors, I tried running through the code line by line and watching in the debugger that it did what I expected. Sure enough, I the cmp instruction within my 'while loop' doesn't seem to be jumping every time it's supposed to:
; store tmp in edx
movzx edx, word ptr[ebx + esi * 2]; edx: tmp = A[i]
...
//blah blah
while_loop :
...
movzx eax, word ptr[ebx + 2 * edi]
cmp dx, ax
jge exit_while
For example, if I use the data given, after the first 'for loop' iteration, I get to the point where EDX=0000ABAB, and EAX=00003333. I reach the lines:
cmp dx, ax
jge exit_while
Since ABAB>3333, I'd expect it to jump to exit_while, yet it doesn't!
What is going on here??? I'm at a total loss.
.data
arr word 3333h, 1111h, 0ABABh, 1999h, 25Abh, 8649h, 0DEh, 99h
sizeArr dword lengthof arr
printmsg byte "The array is: [ ", 0
comma byte ", ", 0
endmsg byte " ]", 0
.code
main proc
call printArr
call crlf
push sizeArr
push offset arr
call insertionsort
call crlf
call printArr
call crlf
call exitprocess
main endp
insertionsort proc
push ebp
mov ebp, esp
_arr = 8
len = _arr + 4
mov ebx, [ebp + _arr]
mov ecx, [ebp + len]
dec ecx
mov esi, 1; store i in esi, with i=1 initially
; for (i = 1; i < SIZE; i++)
forloop:
; store tmp in edx
movzx edx, word ptr[ebx + esi * 2]; edx: tmp = A[i]
; store j in edi, where in initially j = i
mov edi, esi
;set j=i-1
dec edi
;while (j >= 0 && tmp<arr[j])
while_loop :
cmp edi, 0
jl exit_while
movzx eax, word ptr[ebx + 2 * edi]
; cmp dx, [ebx + 2 * edi]
cmp dx, ax
jge exit_while
; A[j] = A[j-1]
push word ptr [ebx+2*edi]
pop word ptr [ebx+2*edi+2]
; j = j - 1
dec edi
jmp while_loop
exit_while:
push dx
pop word ptr[ebx + 2*edi+2]
; mov[ebx + edi], dx; A[j] = tmp
inc esi; i = i + 1
loop forloop
finished:
mov esp, ebp
pop ebp
ret 8
insertionsort endp
printArr proc
push ebp
mov ebp, esp
mov ebx, offset sizeArr
mov ecx, [ebx]
mov esi, offset arr
mov edx, offset printmsg
call writestring
mov edx, offset comma
loop1 :
movzx eax, word ptr [esi]
call writeHex
call writestring
add esi, 2
loop loop1
mov edx, offset endmsg
call writestring
mov esp, ebp
pop ebp
ret
printArr endp

jge is the version for signed values - as such, a word with the value ABAB is negative - hence the comparison result you see.
Try jae (jump if above or equal) instead - the unsigned equivalent.

Wrong Visual Studio assembly output?

I wrote this classic function : (in 32-bit mode)
void ex(size_t a, size_t b)
{
size_t c;
c = a;
a = b;
b = c;
}
I call it inside the main as follows :
size_t a = 4;
size_t b = 5;
ex(a,b);
What I was expecting from the assembly code generated when entering the function is something like this :
1-Push the values of b and a in the stack : (which was done)
mov eax,dword ptr [b]
push eax
mov ecx,dword ptr [a]
push ecx
2-Use the values of a and b in the stack :
push ebp
mov ebp, esp
sub esp, 4
c = a;
mov eax, dword ptr [ebp+8]
mov dword ptr [ebp-4], eax
and so on for the other variables.
However, this is what I find when debugging :
push ebp
mov ebp,esp
sub esp,0CCh // normal since it's in debug with ZI option
push ebx
push esi
push edi
lea edi,[ebp-0CCh]
mov ecx,33h
mov eax,0CCCCCCCCh
rep stos dword ptr es:[edi]
size_t c;
c = a;
mov eax,dword ptr [a]
mov dword ptr [c],eax
Why is it using the variable a directly instead of calling the value stored in the stack? I don't understand...

The debugger doesn't show the instruction using ebp to access a. The same syntax is permitted when you write inline assembly. Otherwise the reason that dword ptr still appears.
It is easy to get it your preferred way, right click > untick "Show Symbol Names".

Using the assembly output option (right click on file name, properties, ...), I get what you expect from debug assembly output. This could depend on which version of VS you use. For this example, I used VS2005. I have VS2015 on a different system, but didn't try it yet.
_c$ = -8 ; size = 4
_a$ = 8 ; size = 4
_b$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd ;fill with 0cccccccch
mov eax, DWORD PTR _a$[ebp]
mov DWORD PTR _c$[ebp], eax
mov eax, DWORD PTR _b$[ebp]
mov DWORD PTR _a$[ebp], eax
mov eax, DWORD PTR _c$[ebp]
mov DWORD PTR _b$[ebp], eax
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP
Note this doesn't work, you need to use pointers for the swap to work.
void ex(size_t *pa, size_t *pb)
{
size_t c;
c = *pa;
*pa = *pb;
*pb = c;
}
which gets translated into:
_c$ = -8 ; size = 4
_pa$ = 8 ; size = 4
_pb$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR [eax]
mov DWORD PTR _c$[ebp], ecx
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR _pb$[ebp]
mov edx, DWORD PTR [ecx]
mov DWORD PTR [eax], edx
mov eax, DWORD PTR _pb$[ebp]
mov ecx, DWORD PTR _c$[ebp]
mov DWORD PTR [eax], ecx
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP

Traversing a 2-d array

I'm having trouble understanding how to traverse a 2-d array in x86 assembly language. I am missing a bit of understanding. This is what I have so far.
The issue is the lines with //offset and //moving through array
For the //offset line the error I am getting is "non constant expression in second operand"
and also
"ebx: illegal register in second operand"
For the next line I get the error
"edx: illegal register in second operand"
mov esi, dim
mul esi
mov eax, 0 // row index
mov ebx, 0 // column index
mov ecx, dim
StartFor:
cmp ecx, esi
jge EndFor
lea edi, image;
mov edx, [eax *dim + ebx] // offset
mov dl, byte ptr [edi + esi*edx] // moving through array
mov edx, edi
and edx, 0x80
cmp edx, 0x00
jne OverThreshold
mov edx, 0xFF
OverThreshold:
mov edx, 0x0

See x86 tag wiki, including the list of addressing modes.
You can scale an index register by a constant, but you can't multiply two registers in an addressing mode. You'll have to do that yourself (e.g. with imul edx, esi, if the number of columns isn't a compile time constant. If it's a power-of-2 constant, you can shift, or even use a scaled addressing mode like [reg + reg*8]).
re: edit: *dim should work if dim is defined with something like dim equ 8. If it's a memory location holding a value, then of course it's not going to work. The scale factor can be 1, 2, 4, or 8. (The machine code format has room for a 2-bit shift count, which is why the options are limited.)
I'd also recommend loading with movzx to zero-extend a byte into edx, instead of only writing dl (the low byte). Actually nvm, your code doesn't need that. In fact, you overwrite the value you loaded with edi. I assume that's a bug.
You can replace
imul edx, esi
mov dl, byte ptr [edi + edx] ; note the different addressing mode
mov edx, edi ; bug? throw away the value you just loaded
and edx, 0x80 ; AND already sets flags according to the result
cmp edx, 0x00 ; so this is redundant
jne OverThreshold
with
imul edx, esi
test 0x80, byte ptr [edi + edx] ; AND, discard the result and set flags.
jnz
Of course, instead of multiplying inside the inner loop, you can just add the columns in the outer loop. This is called Strength Reduction. So you do p+=1 along each row, and p+=cols to go from row to row. Or if you don't need to care about rows and columns, you can just iterate over the flat memory of the 2D array.

A 2-dimensional array is just an interpretation of a sequence of bytes. You'll have to choose in which order to store the items. For example, you might choose "row-major order".
I've written a demo where a buffer is filled with a sequence of numbers. Then the sequence is interpreted as single- and two-dimensional array.
tx86.s
%define ITEM_SIZE 4
extern printf
section .bss
cols: equ 3
rows: equ 4
buf: resd cols * rows
c: resd 1
r: resd 1
section .data
fmt: db "%-4d", 0 ; fmt string, '0'
fmt_nl: db 10, 0 ; "\n", '0'
section .text ; Code section.
global main
main:
push ebp
mov ebp, esp
; fill buf
mov ecx, cols * rows - 1
mov [buf + ITEM_SIZE], ecx
.fill_buf:
mov [buf + ecx * ITEM_SIZE], ecx
dec ecx
jnz .fill_buf
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; buf as 1-dimensional array
; buf[c] = [buf + c * ITEM_SIZE]
xor ecx, ecx
mov [c], ecx
.lp1d:
mov ecx, [c]
push dword [buf + ecx * ITEM_SIZE]
push dword fmt
call printf
mov ecx, [c]
inc ecx
mov [c], ecx
cmp ecx, cols * rows
jl .lp1d
; print new line
push dword fmt_nl
call printf
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; buf as 2-dimensional array
; buf[r][c] = [buf + (r * cols + c) * ITEM_SIZE]
xor ecx, ecx
mov [r], ecx
.lp1:
xor ecx, ecx
mov [c], ecx
.lp2:
; calculate address
mov eax, [r]
mov edx, cols
mul edx ; eax = r * cols
add eax, [c] ; eax = r * cols + c
; print buf[r][c]
push dword [buf + eax * ITEM_SIZE]
push dword fmt
call printf
; next column
mov ecx, [c]
inc ecx
mov [c], ecx
cmp ecx, cols
jl .lp2
; print new line
push dword fmt_nl
call printf
; next row
mov ecx, [r]
inc ecx
mov [r], ecx
cmp ecx, rows
jl .lp1
mov esp, ebp
pop ebp ; restore stack
xor eax, eax ; normal exit code
ret
Buidling(on Linux)
nasm -f elf32 -l tx86.lst tx86.s
gcc -Wall -g -O0 -m32 -o tx86 tx86.o
Running
./tx86
0 1 2 3 4 5 6 7 8 9 10 11
0 1 2
3 4 5
6 7 8
9 10 11

Understanding Intel x86 assembly output for a modulo calculation

I have a school project about compilers and how it differs in assembly code between Intel x86 and ARMv7, but i'm stuck trying to comprehend the assembly for the Intel x86 architecture.
The source code is:
int main()
{
int a=5,b=2;
int result;
result = a % b;
printf("Result of 5 modulo 2 is %i\n", result);
}
Assembly output (gcc masm=Intel)
main:
/*
Intel32-x86 Arhchitecture
Little endian
ebp register -- base pointer
esp register -- stack pointer
*/
push ebp ; ebp register put on stack
mov ebp, esp ; Move data from ebp to esp
and esp, -16 ; Logical AND ??
sub esp, 32 ; Subtraction ??
mov DWORD PTR [esp+20], 5
;5 as 32 bits
;00000101-00000000-00000000-00000000
mov DWORD PTR [esp+24], 2
;2 as 32 bits
;00000010-00000000-00000000-00000000
mov eax, DWORD PTR [esp+20]
mov edx, eax
sar edx, 31
;Shift Arithmetically right - edx med 31.
;00000101-00000000-00000000-00000000 BEFORE
;00000000-00000000-00000000-00000000 AFTER
idiv DWORD PTR [esp+24]
;Signed divide - IDIV r/m32 - EDX:EAX register
;Dividing EDX:EAX on value of esp+24, and save the remainder in edx.
;EDX:EAX 00000000-00000000-00000000-00000000-00000101-00000000-00000000-00000000
mov DWORD PTR [esp+28], edx
mov eax, OFFSET FLAT:.LC0
mov edx, DWORD PTR [esp+28]
mov DWORD PTR [esp+4], edx
mov DWORD PTR [esp], eax
call printf
leave
ret
and esp, -16 ; Logical AND
sub esp, 32 ; Subtraction
What is the purpose of those two instructions?

The purpose is mentioned in the comments:
and esp,-16 ;round esp down to 16 byte boundary
sub esp,32 ;allocate 32 bytes of space for local variables
In case you didn't catch this part about sign extending the dividend:
mov eax, DWORD PTR [esp+20] ; eax = dividend
mov edx, eax ; edx = dividend
sar edx, 31 ; edx = 0 or -1 (the sign extension)

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight