I'm trying to get the hang of Assembly for class. So for this C code:
int a = 10;
int b = 20;
int *aPtr = &a;
int *bPtr = &b;
b += a;
*aPtr = *aPtr + *bPtr; //dereference
printf(“aPtr points to value: %d\n”, *aPtr);
*** Updated
I tried this in Assembly:
.data
var1 DWORD 10
var2 DWORD 20
var3 DWORD ?
.code
main PROC
mov eax, 10
mov ebx, 20
add ebx, eax
mov var3, ebx
mov eax, offset var1
mov ebx, offset var3
mov ecx, [eax]
mov edx, [ebx]
add ecx, edx
mov var3, ecx
INVOKE ExitProcess, 0
main endp
end
But I know that the pointers can't simply be deferenced and added together like that. We also can't use lea, so I'm at a loss on how to add a dereferenced value to another dereferenced value in Assembly; I'm also not sure how I would convert the printf statement correctly. Could I get some help?
Your code is not yet updating the a and b variables with the results from the operations.
int a = 10;
int b = 20;
int *aPtr = &a;
int *bPtr = &b;
a SDWORD 10
b SDWORD 20
aPtr DWORD offset a
bPtr DWORD offset b
b += a;
mov eax, a
add b, eax ; Result in b (30)
*aPtr = *aPtr + *bPtr;
mov edi, aPtr
mov esi, bPtr
mov eax, [edi]
add eax, [esi]
mov [edi], eax ; Result in a (40)
printf(“aPtr points to value: %d\n”, *aPtr);
msg db 'aPtr points to value: %d\n', 0
...
mov edi, aPtr
mov eax, [edi]
push eax
push offset msg
call _printf
add esp, 8
Related
Sorry if I am asking a stupid question, but I can't find the answer due to clumsy search terms I guess
If I declare three variables as follows
volatile uint16_t a, b, c;
Will all three variables be declared volatile?
Or should I really not declare multiple variables in a row but instead do:
volatile uint16_t a;
volatile uint16_t b;
volatile uint16_t c;
If I declare three variables as follows
volatile uint16_t a, b, c;
Will all three variables be declared volatile?
Yes, all 3 variables will be volatile.
Or should I really not declare multiple variables in a row but instead do:
That is related to code style and personal preference. Usually declaring variables one per line is preferred, is more readable, easier to read, easier to refactor and results in more readable changes when browsing diff output of files.
We can check the assembly generated by the compiler to see if it optimizes the variables out or not.
When I check this simple program:
#include <stdio.h>
#include <stdint.h>
int main(void)
{
uint16_t a = 1, b = 1, c = 1;
printf("%hu", a);
printf("%hu", b);
printf("%hu", c);
}
The generated assembly at -O3 (link) is:
.LC0:
.string "%hu"
main:
sub rsp, 8
mov esi, 1
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
mov esi, 1
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
mov esi, 1
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
xor eax, eax
add rsp, 8
ret
It's obvious here that the variables have been optimized out and 1 is being used as a parameter instead of the variables.
When I replace the uint16_t a = 1, b = 1, c = 1; with volatile uint16_t a = 1, b = 1, c = 1;, The assembly generated (link) is:
main:
sub rsp, 24
mov edx, 1
mov ecx, 1
mov eax, 1
mov WORD PTR [rsp+10], ax
mov edi, OFFSET FLAT:.LC0
xor eax, eax
mov WORD PTR [rsp+12], dx
mov WORD PTR [rsp+14], cx
movzx esi, WORD PTR [rsp+10]
call printf
movzx esi, WORD PTR [rsp+12]
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
movzx esi, WORD PTR [rsp+14]
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
xor eax, eax
add rsp, 24
ret
Here, volatile is working like it should for all variables. The variables are created and are not optimized out.
In comparison, if we replace volatile uint16_t a = 1, b = 1, c = 1; with volatile uint16_t a = 1; uint16_t b = 1, c = 1; we see that only a is not optimized out (link):
main:
sub rsp, 24
mov eax, 1
mov edi, OFFSET FLAT:.LC0
mov WORD PTR [rsp+14], ax
movzx esi, WORD PTR [rsp+14]
xor eax, eax
call printf
mov esi, 1
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
mov esi, 1
mov edi, OFFSET FLAT:.LC0
xor eax, eax
call printf
xor eax, eax
add rsp, 24
ret
Look at this code. I return an address of the compound literal here.
#include <stdio.h>
#define FOO(bar) ((bar)->a + (bar)->b)
struct bar {
int a;
int b;
};
static struct bar * to_bar(int a, int b);
int main(void)
{
int baz = FOO((struct bar *) {to_bar(1, 2)});
printf("%d\n", baz);
return 0;
}
static struct bar *
to_bar(int a, int b)
{
return &(struct bar) {a, b};
}
Output:
3
ISO/IEC 9899 says:
If the compound literal occurs outside the body of a function, the
object has static storage duration; otherwise, it has automatic
storage duration associated with the enclosing block.
I. e., in the to_bar function the unnamed object, created by the compound literal has automatic storage duration. Thereby, it will be destroyed outside scope of to_bar. It seems, this code produces undefined behaviour (based on the standard). Is it so?
You are right. In your example, you immediately retrieved the fields after returning from to_bar, so you didn't have time to corrupt the stack frame of the deceased to_bar function. But here's another example:
struct bar {
int a;
int b;
};
static struct bar * to_bar(int a, int b);
int main(void)
{
struct bar * corrupted_bar = to_bar(1, 2);
printf("this print will corrupt\n");
int baz = corrupted_bar->a + corrupted_bar->b;
printf("baz = %d\n", baz);
return 0;
}
static struct bar *
to_bar(int a, int b)
{
return &(struct bar) {a, b};
}
which when executed
this print will corrupt
baz = -59543507
If you look at the assembly
.LC0:
.string "this print will corrupt"
.LC1:
.string "baz = %d\n"
main:
push rbp
mov rbp, rsp
sub rsp, 16
mov esi, 2
mov edi, 1
call to_bar ; call to_bar
mov QWORD PTR [rbp-8], rax ; save address returned to a local pointer
mov edi, OFFSET FLAT:.LC0 ; argument into puts()
call puts ; call puts(), which creates its own local variables that corrupts the bar struct
mov rax, QWORD PTR [rbp-8]
mov edx, DWORD PTR [rax]
mov rax, QWORD PTR [rbp-8]
mov eax, DWORD PTR [rax+4]
add eax, edx
mov DWORD PTR [rbp-12], eax
mov eax, DWORD PTR [rbp-12]
mov esi, eax
mov edi, OFFSET FLAT:.LC1
mov eax, 0
call printf
mov eax, 0
leave
ret
to_bar:
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov DWORD PTR [rbp-24], esi
mov eax, DWORD PTR [rbp-20]
mov DWORD PTR [rbp-8], eax ; field 'a' gets stored, notice dest addr rbp-8 is in the stack frame of this function (local variable)
mov eax, DWORD PTR [rbp-24]
mov DWORD PTR [rbp-4], eax ; field 'b' gets stored, same as above
lea rax, [rbp-8]
pop rbp
ret
I got a task to write an assembly routine that can be read from C and declared as follows:
extern int solve_equation(long int a, long int b,long int c, long int *x, long int *y);
that finds a solution to the equation
a * x + b * y = c
In -2147483648 <x, y <2147483647 by checking all options.
The value returned from the routine will be 1 if a solution is found and another 0.
You must take into consideration that the results of the calculations: a * x, b * y, a * x + b * y can exceed 32 bits.
.MODEL SMALL
.DATA
C DQ ?
SUM DQ 0
MUL1 DQ ?
MUL2 DQ ?
X DD ?
Y DD ?
.CODE
.386
PUBLIC _solve_equation
_solve_equation PROC NEAR
PUSH BP
MOV BP,SP
PUSH SI
MOV X,-2147483648
MOV Y,-2147483648
MOV ECX,4294967295
FOR1:
CMP ECX,0
JE FALSE
PUSH ECX
MOV Y,-2147483648
MOV ECX,4294967295
FOR2:
MOV SUM,0
CMP ECX,0
JE SET_FOR1
MOV EAX,DWORD PTR [BP+4]
IMUL X
MOV DWORD PTR MUL1,EAX
MOV DWORD PTR MUL1+4,EDX
MOV EAX,DWORD PTR [BP+8]
IMUL Y
MOV DWORD PTR MUL2,EAX
MOV DWORD PTR MUL2+4,EDX
MOV EAX, DWORD PTR MUL1
ADD DWORD PTR SUM,EAX
MOV EAX, DWORD PTR MUL2
ADD DWORD PTR SUM,EAX
MOV EAX, DWORD PTR MUL1+4
ADD DWORD PTR SUM+4,EAX
MOV EAX, DWORD PTR MUL2+4
ADD DWORD PTR SUM+4,EAX
CMP SUM,-2147483648
JL SET_FOR2
CMP SUM,2147483647
JG SET_FOR2
MOV EAX,DWORD PTR [BP+12]
CMP DWORD PTR SUM,EAX
JE TRUE
SET_FOR2:
DEC ECX
INC Y
JMP FOR2
SET_FOR1:
POP ECX
DEC ECX
JMP FOR1
FALSE:
MOV AX,0
JMP SOF
TRUE:
MOV SI,WORD PTR [BP+16]
MOV EAX,X
MOV DWORD PTR [SI],EAX
MOV SI,WORD PTR [BP+18]
MOV EAX,Y
MOV DWORD PTR [SI],EAX
MOV AX,1
SOF:
POP SI
POP BP
RET
_solve_equation ENDP
END
Is this the right way to work with large numbers?
I get argument to operation or instruction has illegal size when I try to do:
MOV SUM,0
CMP SUM,-2147483648
CMP SUM,2147483647
main code:
int main()
{
long int x, y, flag;
flag = solve_equation(-5,4,2147483647,&x, &y);
if (flag == 1)
printf("%ld*%ld + %ld*%ld = %ld\n", -5L,x,4L,y,2147483647);
return 0;
}
output
-5*-2147483647 + 4*-2147483647 = 2147483647
I`m using dosbox 0.74 and tcc
You're using 16-bit code, so 64-bit operand-size isn't available. Your assembler magically associates a size with sum, because you defined it with sum dq 0.
So mov sum, 0 is equivalent to mov qword ptr [sum], 0, which of course won't assemble in 16 or 32-bit mode; you can only operate on up to 32 bits at once with integer operations.
(32-bit operand-size is available in 16-bit mode on 386-compatible CPUs, using the same machine encodings that allows 16-bit operand size in 32-bit mode. But 64-bit operand size is only available in 64-bit mode. Unlike 386, AMD64 didn't add any new prefixes or anything to previously-existing modes, for various reasons.)
You could zero the whole 64-bit sum with an SSE store, or even compare with SSE4.2 pcmpgtq, but that's probably not what you want.
It looks like you want to check if 64-bit sum fits in 32 bits. (i.e. if it is a sign-extended 32-bit integer).
So really you just need to check that all 32 high bits are the same and match bit 31 of the low half.
mov eax, dword ptr [sum]
cdq ; sign extend eax into edx:eax
; i.e. copy bit 31 of EAX to all bits of EDX
cmp edx, dword ptr [sum+4]
je small_sum
I wrote this classic function : (in 32-bit mode)
void ex(size_t a, size_t b)
{
size_t c;
c = a;
a = b;
b = c;
}
I call it inside the main as follows :
size_t a = 4;
size_t b = 5;
ex(a,b);
What I was expecting from the assembly code generated when entering the function is something like this :
1-Push the values of b and a in the stack : (which was done)
mov eax,dword ptr [b]
push eax
mov ecx,dword ptr [a]
push ecx
2-Use the values of a and b in the stack :
push ebp
mov ebp, esp
sub esp, 4
c = a;
mov eax, dword ptr [ebp+8]
mov dword ptr [ebp-4], eax
and so on for the other variables.
However, this is what I find when debugging :
push ebp
mov ebp,esp
sub esp,0CCh // normal since it's in debug with ZI option
push ebx
push esi
push edi
lea edi,[ebp-0CCh]
mov ecx,33h
mov eax,0CCCCCCCCh
rep stos dword ptr es:[edi]
size_t c;
c = a;
mov eax,dword ptr [a]
mov dword ptr [c],eax
Why is it using the variable a directly instead of calling the value stored in the stack? I don't understand...
The debugger doesn't show the instruction using ebp to access a. The same syntax is permitted when you write inline assembly. Otherwise the reason that dword ptr still appears.
It is easy to get it your preferred way, right click > untick "Show Symbol Names".
Using the assembly output option (right click on file name, properties, ...), I get what you expect from debug assembly output. This could depend on which version of VS you use. For this example, I used VS2005. I have VS2015 on a different system, but didn't try it yet.
_c$ = -8 ; size = 4
_a$ = 8 ; size = 4
_b$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd ;fill with 0cccccccch
mov eax, DWORD PTR _a$[ebp]
mov DWORD PTR _c$[ebp], eax
mov eax, DWORD PTR _b$[ebp]
mov DWORD PTR _a$[ebp], eax
mov eax, DWORD PTR _c$[ebp]
mov DWORD PTR _b$[ebp], eax
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP
Note this doesn't work, you need to use pointers for the swap to work.
void ex(size_t *pa, size_t *pb)
{
size_t c;
c = *pa;
*pa = *pb;
*pb = c;
}
which gets translated into:
_c$ = -8 ; size = 4
_pa$ = 8 ; size = 4
_pb$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR [eax]
mov DWORD PTR _c$[ebp], ecx
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR _pb$[ebp]
mov edx, DWORD PTR [ecx]
mov DWORD PTR [eax], edx
mov eax, DWORD PTR _pb$[ebp]
mov ecx, DWORD PTR _c$[ebp]
mov DWORD PTR [eax], ecx
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP
Can someone explain me why the following c code is equivalent to the asm code?
void arith1(){
a=(b*10)+(5/(-e));
}
Why do we put the value of b in a ecx register.
ASM code:
mov ecx, DWORD PTR _b
imul ecx, 10 ; $1 = b * 10
mov esi, DWORD PTR _e
neg esi ; $2 = -e
mov eax, 5
cdq
idiv esi ; $3 = 5 / -e
add ecx, eax ; $4 = $1 + $3
mov DWORD PTR _a, ecx ; a = $4
mov ecx, DWORD PTR _b
imul ecx, 10 ; edx:eax = b * 10
mov esi, DWORD PTR _e
neg esi ; esi = -e
mov eax, 5
cdq ; edx:eax = 5
idiv esi ; eax = 5 / -e
add ecx, eax ; ecx = b * 10 + 5 / -e
mov DWORD PTR _a, ecx ; store result to a
The only non-intuitive part is that the imul and idiv instructions combine the edx and eax registers to form a 64-bit value. The upper 32-bits are discarded after the imul instruction, C doesn't perform overflow checks. The cdq instruction (convert double to quad) is required to turn the 32-bit value into a 64-bit value before dividing.
mov ecx, DWORD PTR _b
Move the variable b into ecx register
imul ecx, 10 ; $1 = b * 10
Multiply the ecx register by 10. (b*10)
mov esi, DWORD PTR _e
Move the variable e into the esi register.
neg esi ; $2 = -e
Negate the esi register. (-e)
mov eax, 5
Move 5 into the eax register.
cdq
Convert eax into a quad-word (I think, not sure what it's needed for).
idiv esi ; $3 = 5 / -e
Divide eax by esi (5 / -e)
add ecx, eax ; $4 = $1 + $3
Add eax to ecx (b*10)+(5/-e).
mov DWORD PTR _a, ecx ; a = $4
Move ecx into variable a (a = (b*10)+(5/-e)).