float to double (IEEE754) conversion - c

I'm trying to convert 32bit float to 64bit double in asm on x86 architecture. The conversion is done by function written in asm and then I want to call it from C. I have no idea what I'm doing wrong, but memory pointed by dst seem to stay untouched and after printf program crashes. I want to do it without any floating-point intructions. Here's the code:
.686
.model flat
public _conv
.data
mantissa_mask dd 00000000011111111111111111111111b
exponent_mask dd 01111111100000000000000000000000b
.code
_conv PROC
pusha
mov ebp, esp
mov esi, dword ptr [ebp+8] ; src
mov edi, dword ptr [ebp+12]; dst
mov dword ptr [edi], 0
mov dword ptr [edi+4], 0
mov eax, dword ptr [esi]
and eax, dword ptr mantissa_mask
mov dword ptr [edi], eax
xor edx, edx ; zero edx
mov ecx, 1
shl ecx, 29 ;ecx == 2^29
mul ecx ;so it's like `shl edx:eax, 29`
mov dword ptr [edi], eax
mov dword ptr [edi+4], edx
mov eax, dword ptr [esi]
and eax, dword ptr exponent_mask
shr eax, 23 ;put exponent on lowest bits
sub eax, 127 ;exponent in float is coded enlarged by 127
add eax, 1023 ;in double it's enlarged by 1023
shl eax, 20 ;exponent in double starts on 20bit of 2nd byte
or dword ptr [edi], eax
;sign bit:
bt dword ptr [esi], 31
jc set_sign_bit
btr dword ptr [edi+4], 31
jmp endthis
set_sign_bit:
bts dword ptr [edi+4], 31
endthis:
popa
ret
_conv ENDP
END
And the C code:
void conv(float * src, double * dst);
int main()
{
float src = 4.5f;
double dst = 0.;
conv(&src, &dst);
printf("%f\n", dst);
return 0;
}

Your primary problem is accessing the arguments. Since you did pusha the arguments are not at [ebp+8] and [ebp+12], rather at [ebp+36] and [ebp+40]. A debugger would have shown you this right away. Even with those changes your code is still broken though.

Ok, finally it works. Very helpful was Jester's advice about args access. Stupid thing, but hard to notice. Here's final code:
.686
.model flat
public _conv
.data
mantissa_mask dd 00000000011111111111111111111111b
exponent_mask dd 01111111100000000000000000000000b
.code
_conv PROC
pusha
mov ebp, esp
;+36 and +40 since pusha
mov esi, dword ptr [ebp+36]; src
mov edi, dword ptr [ebp+40]; dst
mov dword ptr [edi], 0
mov dword ptr [edi+4], 0
;mentissa:
mov eax, dword ptr [esi]
and eax, dword ptr mantissa_mask
mov dword ptr [edi], eax
xor edx, edx ; zero edx
mov ecx, 1
shl ecx, 29 ;ecx == 2^29
mul ecx ;so it's like `shl edx:eax, 29`
mov dword ptr [edi], eax
mov dword ptr [edi+4], edx
;exponent:
mov eax, dword ptr [esi]
and eax, dword ptr exponent_mask
shr eax, 23 ;put exponent on lowest bits
sub eax, 127 ;exponent in float is coded enlarged by 127
add eax, 1023 ;in double it's enlarged by 1023
shl eax, 20 ;exponent in double starts on 20bit of 2nd byte
or dword ptr [edi+4], eax
;sign bit:
bt dword ptr [esi], 31
jc set_sign_bit
btr dword ptr [edi+4], 31
jmp endthis
set_sign_bit:
bts dword ptr [edi+4], 31
endthis:
popa
ret
_conv ENDP
END

Related

Struct or matrix for a register?

I want to have a register containing 4 bytes of address and 4 bytes of data. For that, I thought about building it in an array of structures (containing address and data as members) or in a matrix. Here a sample code to test what I want to achieve:
#include <stdio.h>
#include <stdint.h>
void reg_init();
#define REG_SIZE 1
typedef struct Reg{
uint8_t addr[4];
uint8_t data[4];
} reg;
static reg reg_struct[REG_SIZE];
static uint8_t reg_matrix[REG_SIZE][8];
int main()
{
int index=-1;
reg_init();
for(int i=0; i<REG_SIZE; i++)
{
uint8_t addr_to_check[4]={0x12,0x34,0x56,0x78};
// FOR STRUCT
for(int j=0; j<4; j++)
{
if(addr_to_check[j]!=reg_struct[i].addr[j]) break;
if(j==3) index = i;
}
//FOR MATRIX
for(int j=0; j<4; j++)
{
if(addr_to_check[j]!=reg_matrix[i][j]) break;
if(j==3) index = i;
}
}
if (index<0) printf("Address not found\n");
else printf("Address at index: %i",index);
return 0;
}
void reg_init()
{
// Register init for testing
reg_struct[0].addr[0] = 0x12;
reg_struct[0].addr[1] = 0x34;
reg_struct[0].addr[2] = 0x56;
reg_struct[0].addr[3] = 0x78;
reg_struct[0].data[0] = 0x01;
reg_struct[0].data[1] = 0x02;
reg_struct[0].data[2] = 0x03;
reg_struct[0].data[3] = 0x04;
reg_matrix[0][0] = 0x12;
reg_matrix[0][1] = 0x34;
reg_matrix[0][2] = 0x56;
reg_matrix[0][3] = 0x78;
reg_matrix[0][4] = 0x01;
reg_matrix[0][5] = 0x02;
reg_matrix[0][6] = 0x03;
reg_matrix[0][7] = 0x04;
}
The example shows just a unit size register, but the size will be much higher (up to 8 bytes). Overall, I'm interested in optimization in terms of performance. Does it really care to use one or another, or will the compiler build the same machine code?
Below is the assembly of the above code created using visual studio 2019.
Look at record line number ; Line 50 and ; Line 51 it looks like compiler has created same assembly code for both matrix and structure.
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.20.27508.1
TITLE D:\main.c
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC ___local_stdio_printf_options
PUBLIC __vfprintf_l
PUBLIC _printf
PUBLIC _reg_init
PUBLIC _main
EXTRN ___acrt_iob_func:PROC
EXTRN ___stdio_common_vfprintf:PROC
_DATA SEGMENT
COMM ?_OptionsStorage#?1??__local_stdio_printf_options##9#9:QWORD ; `__local_stdio_printf_options'::`2'::_OptionsStorage
_DATA ENDS
_BSS SEGMENT
_reg_struct DQ 01H DUP (?)
_reg_matrix DB 08H DUP (?)
_BSS ENDS
_DATA SEGMENT
$SG8132 DB 'Address not found', 0aH, 00H
ORG $+1
$SG8133 DB 'Address at index: %i', 00H
_DATA ENDS
; Function compile flags: /Odtp
_TEXT SEGMENT
_index$ = -20 ; size = 4
_addr_to_check$1 = -16 ; size = 4
_j$2 = -12 ; size = 4
_j$3 = -8 ; size = 4
_i$4 = -4 ; size = 4
_main PROC
; File D:\main.c
; Line 16
push ebp
mov ebp, esp
sub esp, 20 ; 00000014H
; Line 17
mov DWORD PTR _index$[ebp], -1
; Line 18
call _reg_init
; Line 19
mov DWORD PTR _i$4[ebp], 0
jmp SHORT $LN4#main
$LN2#main:
mov eax, DWORD PTR _i$4[ebp]
add eax, 1
mov DWORD PTR _i$4[ebp], eax
$LN4#main:
cmp DWORD PTR _i$4[ebp], 1
jge $LN3#main
; Line 21
mov BYTE PTR _addr_to_check$1[ebp], 18 ; 00000012H
mov BYTE PTR _addr_to_check$1[ebp+1], 52 ; 00000034H
mov BYTE PTR _addr_to_check$1[ebp+2], 86 ; 00000056H
mov BYTE PTR _addr_to_check$1[ebp+3], 120 ; 00000078H
; Line 23
mov DWORD PTR _j$3[ebp], 0
jmp SHORT $LN7#main
$LN5#main:
mov ecx, DWORD PTR _j$3[ebp]
add ecx, 1
mov DWORD PTR _j$3[ebp], ecx
$LN7#main:
cmp DWORD PTR _j$3[ebp], 4
jge SHORT $LN6#main
; Line 25
mov edx, DWORD PTR _j$3[ebp]
movzx eax, BYTE PTR _addr_to_check$1[ebp+edx]
mov ecx, DWORD PTR _j$3[ebp]
mov edx, DWORD PTR _i$4[ebp]
movzx ecx, BYTE PTR _reg_struct[ecx+edx*8]
cmp eax, ecx
je SHORT $LN11#main
jmp SHORT $LN6#main
$LN11#main:
; Line 26
cmp DWORD PTR _j$3[ebp], 3
jne SHORT $LN12#main
mov edx, DWORD PTR _i$4[ebp]
mov DWORD PTR _index$[ebp], edx
$LN12#main:
; Line 27
jmp SHORT $LN5#main
$LN6#main:
; Line 30
mov DWORD PTR _j$2[ebp], 0
jmp SHORT $LN10#main
$LN8#main:
mov eax, DWORD PTR _j$2[ebp]
add eax, 1
mov DWORD PTR _j$2[ebp], eax
$LN10#main:
cmp DWORD PTR _j$2[ebp], 4
jge SHORT $LN9#main
; Line 32
mov ecx, DWORD PTR _j$2[ebp]
movzx edx, BYTE PTR _addr_to_check$1[ebp+ecx]
mov eax, DWORD PTR _j$2[ebp]
mov ecx, DWORD PTR _i$4[ebp]
movzx eax, BYTE PTR _reg_matrix[eax+ecx*8]
cmp edx, eax
je SHORT $LN13#main
jmp SHORT $LN9#main
$LN13#main:
; Line 33
cmp DWORD PTR _j$2[ebp], 3
jne SHORT $LN14#main
mov ecx, DWORD PTR _i$4[ebp]
mov DWORD PTR _index$[ebp], ecx
$LN14#main:
; Line 34
jmp SHORT $LN8#main
$LN9#main:
; Line 35
jmp $LN2#main
$LN3#main:
; Line 36
cmp DWORD PTR _index$[ebp], 0
jge SHORT $LN15#main
push OFFSET $SG8132
call _printf
add esp, 4
jmp SHORT $LN16#main
$LN15#main:
; Line 37
mov edx, DWORD PTR _index$[ebp]
push edx
push OFFSET $SG8133
call _printf
add esp, 8
$LN16#main:
; Line 38
xor eax, eax
; Line 39
mov esp, ebp
pop ebp
ret 0
_main ENDP
_TEXT ENDS
; Function compile flags: /Odtp
_TEXT SEGMENT
_reg_init PROC
; File D:\main.c
; Line 41
push ebp
mov ebp, esp
; Line 43
mov eax, 8
imul ecx, eax, 0
mov edx, 1
imul eax, edx, 0
mov BYTE PTR _reg_struct[ecx+eax], 18 ; 00000012H
; Line 44
mov ecx, 8
imul edx, ecx, 0
mov eax, 1
shl eax, 0
mov BYTE PTR _reg_struct[edx+eax], 52 ; 00000034H
; Line 45
mov ecx, 8
imul edx, ecx, 0
mov eax, 1
shl eax, 1
mov BYTE PTR _reg_struct[edx+eax], 86 ; 00000056H
; Line 46
mov ecx, 8
imul edx, ecx, 0
mov eax, 1
imul ecx, eax, 3
mov BYTE PTR _reg_struct[edx+ecx], 120 ; 00000078H
; Line 47
mov edx, 8
imul eax, edx, 0
mov ecx, 1
imul edx, ecx, 0
mov BYTE PTR _reg_struct[eax+edx+4], 1
; Line 48
mov eax, 8
imul ecx, eax, 0
mov edx, 1
shl edx, 0
mov BYTE PTR _reg_struct[ecx+edx+4], 2
; Line 49
mov eax, 8
imul ecx, eax, 0
mov edx, 1
shl edx, 1
mov BYTE PTR _reg_struct[ecx+edx+4], 3
; Line 50
mov eax, 8
imul ecx, eax, 0
mov edx, 1
imul eax, edx, 3
mov BYTE PTR _reg_struct[ecx+eax+4], 4
; Line 51
mov ecx, 8
imul edx, ecx, 0
mov eax, 1
imul ecx, eax, 0
mov BYTE PTR _reg_matrix[edx+ecx], 18 ; 00000012H
; Line 52
mov edx, 8
imul eax, edx, 0
mov ecx, 1
shl ecx, 0
mov BYTE PTR _reg_matrix[eax+ecx], 52 ; 00000034H
; Line 53
mov edx, 8
imul eax, edx, 0
mov ecx, 1
shl ecx, 1
mov BYTE PTR _reg_matrix[eax+ecx], 86 ; 00000056H
; Line 54
mov edx, 8
imul eax, edx, 0
mov ecx, 1
imul edx, ecx, 3
mov BYTE PTR _reg_matrix[eax+edx], 120 ; 00000078H
; Line 55
mov eax, 8
imul ecx, eax, 0
mov edx, 1
shl edx, 2
mov BYTE PTR _reg_matrix[ecx+edx], 1
; Line 56
mov eax, 8
imul ecx, eax, 0
mov edx, 1
imul eax, edx, 5
mov BYTE PTR _reg_matrix[ecx+eax], 2
; Line 57
mov ecx, 8
imul edx, ecx, 0
mov eax, 1
imul ecx, eax, 6
mov BYTE PTR _reg_matrix[edx+ecx], 3
; Line 58
mov edx, 8
imul eax, edx, 0
mov ecx, 1
imul edx, ecx, 7
mov BYTE PTR _reg_matrix[eax+edx], 4
; Line 59
pop ebp
ret 0
_reg_init ENDP
_TEXT ENDS
; Function compile flags: /Odtp
; COMDAT _printf
_TEXT SEGMENT
__Result$ = -8 ; size = 4
__ArgList$ = -4 ; size = 4
__Format$ = 8 ; size = 4
_printf PROC ; COMDAT
; File C:\Program Files (x86)\Windows Kits\10\include\10.0.17763.0\ucrt\stdio.h
; Line 954
push ebp
mov ebp, esp
sub esp, 8
; Line 957
lea eax, DWORD PTR __Format$[ebp+4]
mov DWORD PTR __ArgList$[ebp], eax
; Line 958
mov ecx, DWORD PTR __ArgList$[ebp]
push ecx
push 0
mov edx, DWORD PTR __Format$[ebp]
push edx
push 1
call ___acrt_iob_func
add esp, 4
push eax
call __vfprintf_l
add esp, 16 ; 00000010H
mov DWORD PTR __Result$[ebp], eax
; Line 959
mov DWORD PTR __ArgList$[ebp], 0
; Line 960
mov eax, DWORD PTR __Result$[ebp]
; Line 961
mov esp, ebp
pop ebp
ret 0
_printf ENDP
_TEXT ENDS
; Function compile flags: /Odtp
; COMDAT __vfprintf_l
_TEXT SEGMENT
__Stream$ = 8 ; size = 4
__Format$ = 12 ; size = 4
__Locale$ = 16 ; size = 4
__ArgList$ = 20 ; size = 4
__vfprintf_l PROC ; COMDAT
; File C:\Program Files (x86)\Windows Kits\10\include\10.0.17763.0\ucrt\stdio.h
; Line 642
push ebp
mov ebp, esp
; Line 643
mov eax, DWORD PTR __ArgList$[ebp]
push eax
mov ecx, DWORD PTR __Locale$[ebp]
push ecx
mov edx, DWORD PTR __Format$[ebp]
push edx
mov eax, DWORD PTR __Stream$[ebp]
push eax
call ___local_stdio_printf_options
mov ecx, DWORD PTR [eax+4]
push ecx
mov edx, DWORD PTR [eax]
push edx
call ___stdio_common_vfprintf
add esp, 24 ; 00000018H
; Line 644
pop ebp
ret 0
__vfprintf_l ENDP
_TEXT ENDS
; Function compile flags: /Odtp
; COMDAT ___local_stdio_printf_options
_TEXT SEGMENT
___local_stdio_printf_options PROC ; COMDAT
; File C:\Program Files (x86)\Windows Kits\10\include\10.0.17763.0\ucrt\corecrt_stdio_config.h
; Line 86
push ebp
mov ebp, esp
; Line 88
mov eax, OFFSET ?_OptionsStorage#?1??__local_stdio_printf_options##9#9 ; `__local_stdio_printf_options'::`2'::_OptionsStorage
; Line 89
pop ebp
ret 0
___local_stdio_printf_options ENDP
_TEXT ENDS
END

Assembly x86-32 and some c functions

I never learn C language so it makes me confuse. I just like to know if I did it correctly or where I need to improve. For this code I used assembly x86 32 bit. Thanks
This is what I supposed to do:
Write a procedure with the signature
char *strchar(char *s1, char c1)
that returns a pointer to the first occurrence of the character c1 within the string s1 or, if not found, returns a null.
This is what I came out with:
strchar (char*, char):
push ebp
mov ebp, esp
mov dword ptr [ebp-24], edi
mov EAX , esi
mov BYTE PTR [ebp-28], al
.L5:
mov EAX , dword ptr [ebp-24]
movzx EAX , byte ptr [ EAX ]
test AL, AL
je .L2
mov EAX , dword PTR [ebp-24]
movzx EAX , BYTE PTR [ EAX ]
cmp BYTE PTR [ebp-28], al
jne .L3
mov eax, dword PTR [ebp-24]
jmp .L6
.L3:
add dword PTR [ebp-24], 1
jmp .L5
.L2:
LEA eax, [ebp-9]
MOV DWORD PTR [EBP-8], eax
MOV EAX, DWORD PTR [ebp-8]
.L6:
POP EBP
RET
The lines:
mov dword ptr [ebp-24], edi
mov EAX , esi
mov BYTE PTR [ebp-28], al
assume that a stack frame has been allocated for this function which doesn’t appear true; I think you should have something like:
sub esp, 32
after the
mov ebp,esp
Also, the three lines after L2 seem confused. The only way to get to L2 is if the nil (0) byte is discovered in the string, at which point, the code should return a NULL pointer.
The exit path in the code (L6) leaves eax alone, so all that should be needed is:
L2:
mov eax, 0
It might make debugging easier if you kept the alias up to date; so:
L2:
mov eax, 0
mov [ebp-24], eax
Also, the calling convention used here is a bit odd: the string is passed in edi and the character in esi. Normally, in x86-32, these would both be passed on the stack. This looks like it might have been x86-64 code, converted to x86-32....
A final note; this assembly code looks like the output of a compiler with optimisations disabled. Often, generating the assembly with the optimisations enabled generates easier to understand code. This code, for example, could be much more concisely written as below, without even devolving into weird intel ops:
strchar:
mov edx, esi
mov eax, edi
L:
mov dh, [eax]
test dh, dh
jz null
cmp dh, dl
je done
inc eax
jmp L
null:
mov eax, 0
done:
ret
Here is one with stack overhead
[global strchar]
strchar:
push ebp
mov ebp, esp
mov dl, byte [ebp + 12]
mov ecx, dword [ebp + 8]
xor eax, eax
.loop: mov al, [ecx]
or al, al
jz .exit
cmp al, dl
jz .found
add ecx, 1
jmp .loop
.found: mov eax, ecx
.exit:
leave
ret
Here is one without stack overhead
[global strchar]
strchar:
mov dl, byte [esp + 8]
mov ecx, dword [esp + 4]
xor eax, eax
.loop: mov al, [ecx]
or al, al
jz .exit
cmp al, dl
jz .found
add ecx, 1
jmp .loop
.found: mov eax, ecx
.exit:
ret
These are using the 'cdecl' calling convention. For 'stdcall' change the last 'ret' to 'ret 8'.

Wrong Visual Studio assembly output?

I wrote this classic function : (in 32-bit mode)
void ex(size_t a, size_t b)
{
size_t c;
c = a;
a = b;
b = c;
}
I call it inside the main as follows :
size_t a = 4;
size_t b = 5;
ex(a,b);
What I was expecting from the assembly code generated when entering the function is something like this :
1-Push the values of b and a in the stack : (which was done)
mov eax,dword ptr [b]
push eax
mov ecx,dword ptr [a]
push ecx
2-Use the values of a and b in the stack :
push ebp
mov ebp, esp
sub esp, 4
c = a;
mov eax, dword ptr [ebp+8]
mov dword ptr [ebp-4], eax
and so on for the other variables.
However, this is what I find when debugging :
push ebp
mov ebp,esp
sub esp,0CCh // normal since it's in debug with ZI option
push ebx
push esi
push edi
lea edi,[ebp-0CCh]
mov ecx,33h
mov eax,0CCCCCCCCh
rep stos dword ptr es:[edi]
size_t c;
c = a;
mov eax,dword ptr [a]
mov dword ptr [c],eax
Why is it using the variable a directly instead of calling the value stored in the stack? I don't understand...
The debugger doesn't show the instruction using ebp to access a. The same syntax is permitted when you write inline assembly. Otherwise the reason that dword ptr still appears.
It is easy to get it your preferred way, right click > untick "Show Symbol Names".
Using the assembly output option (right click on file name, properties, ...), I get what you expect from debug assembly output. This could depend on which version of VS you use. For this example, I used VS2005. I have VS2015 on a different system, but didn't try it yet.
_c$ = -8 ; size = 4
_a$ = 8 ; size = 4
_b$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd ;fill with 0cccccccch
mov eax, DWORD PTR _a$[ebp]
mov DWORD PTR _c$[ebp], eax
mov eax, DWORD PTR _b$[ebp]
mov DWORD PTR _a$[ebp], eax
mov eax, DWORD PTR _c$[ebp]
mov DWORD PTR _b$[ebp], eax
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP
Note this doesn't work, you need to use pointers for the swap to work.
void ex(size_t *pa, size_t *pb)
{
size_t c;
c = *pa;
*pa = *pb;
*pb = c;
}
which gets translated into:
_c$ = -8 ; size = 4
_pa$ = 8 ; size = 4
_pb$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR [eax]
mov DWORD PTR _c$[ebp], ecx
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR _pb$[ebp]
mov edx, DWORD PTR [ecx]
mov DWORD PTR [eax], edx
mov eax, DWORD PTR _pb$[ebp]
mov ecx, DWORD PTR _c$[ebp]
mov DWORD PTR [eax], ecx
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP

Use the XCHG instruction to reorder the integers to 10-8-6-4-2 x86 assembly

I'm trying to swap element 1 with element 5 and element 2 with element 4, in the dword array using xchg instruction. I'm only asking for guidance to help me get started. I have this so far. I'm trying to use these instructions for the dword array.
mov ax,val1
xchg ax,val2
mov val1,ax
.data
myArray BYTE 2,4,6,8,10
DArray DWORD 5 DUP(?)
.code
main PROC
mov esi, OFFSET myArray
mov edi, OFFSET DArray
mov ecx, 5
L1: movzx eax, byte ptr [esi]
mov [edi], eax
inc esi
add edi, 4
call WriteDec
loop L1
mov al, byte ptr [esi]
xchg al, byte ptr [esi]+4
mov byte ptr[esi], al
call WriteDec
mov ah, byte ptr [esi]+1
xchg ah, byte ptr [esi]+3
mov byte ptr [esi]+1, ah
call WriteDec
The swap in the byte array lacks the setting of the ESI register.
mov esi, OFFSET myArray <<< Add this
mov al, byte ptr [esi]
xchg al, byte ptr [esi]+4
mov byte ptr[esi], al
call WriteDec <<< Is this useful ???
mov ah, byte ptr [esi]+1
xchg ah, byte ptr [esi]+3
mov byte ptr [esi]+1, ah
call WriteDec <<< Is this useful ???
I'm trying to swap element 1 with element 5 and element 2 with element 4, in the dword array using xchg instruction.
mov edi, OFFSET DArray
mov eax, dword ptr [edi]
xchg eax, dword ptr [edi]+16
mov dword ptr[edi], eax
mov eax, dword ptr [edi]+4
xchg eax, dword ptr [edi]+12
mov dword ptr [edi]+4, eax

Understanding Intel x86 assembly output for a modulo calculation

I have a school project about compilers and how it differs in assembly code between Intel x86 and ARMv7, but i'm stuck trying to comprehend the assembly for the Intel x86 architecture.
The source code is:
int main()
{
int a=5,b=2;
int result;
result = a % b;
printf("Result of 5 modulo 2 is %i\n", result);
}
Assembly output (gcc masm=Intel)
main:
/*
Intel32-x86 Arhchitecture
Little endian
ebp register -- base pointer
esp register -- stack pointer
*/
push ebp ; ebp register put on stack
mov ebp, esp ; Move data from ebp to esp
and esp, -16 ; Logical AND ??
sub esp, 32 ; Subtraction ??
mov DWORD PTR [esp+20], 5
;5 as 32 bits
;00000101-00000000-00000000-00000000
mov DWORD PTR [esp+24], 2
;2 as 32 bits
;00000010-00000000-00000000-00000000
mov eax, DWORD PTR [esp+20]
mov edx, eax
sar edx, 31
;Shift Arithmetically right - edx med 31.
;00000101-00000000-00000000-00000000 BEFORE
;00000000-00000000-00000000-00000000 AFTER
idiv DWORD PTR [esp+24]
;Signed divide - IDIV r/m32 - EDX:EAX register
;Dividing EDX:EAX on value of esp+24, and save the remainder in edx.
;EDX:EAX 00000000-00000000-00000000-00000000-00000101-00000000-00000000-00000000
mov DWORD PTR [esp+28], edx
mov eax, OFFSET FLAT:.LC0
mov edx, DWORD PTR [esp+28]
mov DWORD PTR [esp+4], edx
mov DWORD PTR [esp], eax
call printf
leave
ret
and esp, -16 ; Logical AND
sub esp, 32 ; Subtraction
What is the purpose of those two instructions?
The purpose is mentioned in the comments:
and esp,-16 ;round esp down to 16 byte boundary
sub esp,32 ;allocate 32 bytes of space for local variables
In case you didn't catch this part about sign extending the dividend:
mov eax, DWORD PTR [esp+20] ; eax = dividend
mov edx, eax ; edx = dividend
sar edx, 31 ; edx = 0 or -1 (the sign extension)

Resources