Understanding GCC inline assembly with a Hello World program - c

A friend helped me come up with the following code to use inline assembly in GCC on a 64-bit Windows machine:
int main() {
char* str = "Hello World";
int ret;
asm volatile(
"call puts"
: "=a" (ret), "+c" (str)
:
: "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11");
return 0;
}
After compiling with -S -masm=intel (I prefer Intel syntax), I get this assembly code:
.file "hello.c"
.intel_syntax noprefix
.def __main; .scl 2; .type 32; .endef
.section .rdata,"dr"
.LC0:
.ascii "Hello World\0"
.text
.globl main
.def main; .scl 2; .type 32; .endef
.seh_proc main
main:
push rbp
.seh_pushreg rbp
push rdi
.seh_pushreg rdi
push rsi
.seh_pushreg rsi
mov rbp, rsp
.seh_setframe rbp, 0
sub rsp, 48
.seh_stackalloc 48
.seh_endprologue
call __main
lea rax, .LC0[rip]
mov QWORD PTR -8[rbp], rax
mov rax, QWORD PTR -8[rbp]
mov rcx, rax
/APP
# 7 "hello.c" 1
call puts
# 0 "" 2
/NO_APP
mov DWORD PTR -12[rbp], eax
mov QWORD PTR -8[rbp], rcx
mov eax, 0
add rsp, 48
pop rsi
pop rdi
pop rbp
ret
.seh_endproc
.ident "GCC: (x86_64-posix-seh-rev1, Built by MinGW-W64 project) 4.9.2"
It works, but it sure looks messy with what appears to be superfluous code. Then again, my last experience with assembly was with the 65816 back in the 80s, and it wasn't inline. Anyway, I cleaned up the code, and the following accomplishes the exact same thing, as far as I can tell:
.intel_syntax noprefix
.data:
.ascii "Hello World\0"
.text
.globl main
main:
sub rsp, 48
lea rax, .data[rip]
mov rcx, rax
call puts
mov eax, 0
add rsp, 48
ret
Much simpler. What's all that extra stuff GCC added?
Edit: Not a duplicate because in addition to the structured exception handling, I'm also asking about the callee-saved registers, the call to __main, the explicit size directives, and the APP/NO_APP section.

Related

Return of syscall with wrong file descriptor is not negative [duplicate]

I'm having trouble finding the good documentation for writing 64-bit assembly on MacOS.
The 64-bit SysV ABI says the following in section A.2.1 and this SO post quotes it:
A system-call is done via the syscall instruction. The kernel destroys
registers %rcx and %r11.
Returning from the syscall, register %rax contains the result of the
system-call. A value in the range between -4095 and -1 indicates an error,
it is -errno.
Those two sentences are ok on Linux but are wrong on macOS Sierra with the following code:
global _start
extern _exit
section .text
_start:
; Align stack to 16 bytes for libc
and rsp, 0xFFFFFFFFFFFFFFF0
; Call write
mov rdx, 12 ; size
mov rsi, hello ; buf
mov edi, 1 ; fd
mov rax, 0x2000004 ; write ; replace to mov rax, 0x1 on linux
syscall
jc .err ; Jumps on error on macOS, but why?
jnc .ok
.err:
mov rdi, -1
call _exit ; exit(-1)
.ok:
; Expect rdx to be 12, but it isn't on macOS!
mov rdi, rdx
call _exit ; exit(rdx)
; String for write
section .data
hello:
.str db `Hello world\n`
.len equ $-hello.str
Compile with NASM:
; MacOS: nasm -f macho64 syscall.asm && ld syscall.o -lc -macosx_version_min 10.12 -e _start -o syscall
; Linux: nasm -f elf64 syscall.asm -o syscall.o && ld syscall.o -lc -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o syscall
Run on macOS:
./syscall # Return value 0
./syscall >&- # Return value 255 (-1)
I found out that:
A syscall return errno an sets the carry flag on error, instead of returning -errno in rax
rdx register is clobbered by syscall
On Linux, everything works as expected
Why is rdx clobbered? Why doesn't a syscall return -errno? Where can I find the real documentation?
The only place I found where someone talks about the carry flag for syscall errors is here
I used this:
# as hello.asm -o hello.o
# ld hello.o -macosx_version_min 10.13 -e _main -o hello -lSystem
.section __DATA,__data
str:
.asciz "Hello world!\n"
.section __TEXT,__text
.globl _main
_main:
movl $0x2000004, %eax # preparing system call 4
movl $1, %edi # STDOUT file descriptor is 1
movq str#GOTPCREL(%rip), %rsi # The value to print
movq $13, %rdx # the size of the value to print
syscall
movl %eax, %edi
movl $0x2000001, %eax # exit (return value of the call to write())
syscall
and was able to catch return value into eax. Here return value is the number of bytes actually written by write system call. And yes MacOS being a BSD variant it is the carry flag that tells you if the syscall was wrong or not (errno is just an external linkage variable).
# hello_asm.s
# as hello_asm.s -o hello_asm.o
# ld hello_asm.o -e _main -o hello_asm
.section __DATA,__data
str:
.asciz "Hello world!\n"
good:
.asciz "OK\n"
.section __TEXT,__text
.globl _main
_main:
movl $0x2000004, %eax # preparing system call 4
movl $5, %edi # STDOUT file descriptor is 5
movq str#GOTPCREL(%rip), %rsi # The value to print
movq $13, %rdx # the size of the value to print
syscall
jc err
movl $0x2000004, %eax # preparing system call 4
movl $1, %edi # STDOUT file descriptor is 1
movq good#GOTPCREL(%rip), %rsi # The value to print
movq $3, %rdx # the size of the value to print
syscall
movl $0, %edi
movl $0x2000001, %eax # exit 0
syscall
err:
movl $1, %edi
movl $0x2000001, %eax # exit 1
syscall
This will exits with error code one because descriptor 5 was used, if you try descriptor 1 then it will work printing another message and exiting with 0.
I don't know why rdx gets clobbered, just to confirm that it indeed does seem to get zeroed across the "write" systemcall. I examined the status of every register:
global _start
section .text
_start:
mov rax, 0xDEADBEEF; 0xDEADBEEF = 3735928559; 3735928559 mod 256 = 239
mov rbx, 0xDEADBEEF
mov rcx, 0xDEADBEEF
mov rdx, 0xDEADBEEF
mov rsi, 0xDEADBEEF
mov rdi, 0xDEADBEEF
mov rsp, 0xDEADBEEF
mov rbp, 0xDEADBEEF
mov r8, 0xDEADBEEF
mov r9, 0xDEADBEEF
mov r10, 0xDEADBEEF
mov r11, 0xDEADBEEF
mov r12, 0xDEADBEEF
mov r13, 0xDEADBEEF
mov r14, 0xDEADBEEF
mov r15, 0xDEADBEEF
mov rdx, len2 ; size
mov rsi, msg2 ; buf
mov rdi, 1 ; fd
mov rax, 0x2000004 ; write
syscall
mov rdi, rsi ; CHANGE THIS TO EXAMINE DIFFERENT REGISTERS
mov rax, 0x2000001 ; exit
syscall
section .data
msg_pad db `aaaa\n` ; to make the buffer not to be page-aligned
msg2 db `bbbbbb\n` ; because then it's easier to notice whether
len2 equ $-msg2 ; clobbered or not
nasm -f macho64 syscall.asm && ld syscall.o -e _start -static && ./a.out; echo "status: $?"
The results I got:
clobber list of a "write" syscall
rax clobbered
rbx not clobbered
rcx clobbered
rdx clobbered <- This is the unexpected case?!
rsi not clobbered
rdi not clobbered
rsp not clobbered
rbp not clobbered
r8 not clobbered
r9 not clobbered
r10 not clobbered
r11 clobbered
r12 not clobbered
r13 not clobbered
r14 not clobbered
r15 not clobbered
It would be interesting to know other syscalls zero rdx too, I didn't have the energy to attempt a thorough investigation. But maybe, just to be safe, one should add rdx to the clobber list of all of the MacOS syscalls from now on.

Non-used Reservated Stack in Intel x86 Assembly

I am in the beginning of learning intel's x86 assembly code and compiled this simple "hello world" c program (without the cfi additions for simplicity):
#include
int main(int argc, char* argv[]) {
printf("hello world!");
return 0;
}
The following x86 code came out:
.file "helloworld.c"
.intel_syntax noprefix
.section .rodata
.LC0:
.string "hello world!"
.text
.globl main
.type main, #function
main:
push rbp
mov rbp, rsp
sub rsp, 16
mov DWORD PTR -4[rbp], edi
mov QWORD PTR -16[rbp], rsi
lea rdi, .LC0[rip]
mov eax, 0
call printf#PLT
mov eax, 0
leave
ret
.size main, .-main
.ident "GCC: (Debian 7.2.0-19) 7.2.0"
.section .note.GNU-stack,"",#progbits
The question: Why are those 16 bytes for local variables reserved on the stack but aren't used in any way? The program even does the same, without those lines, so for which reason were they created?

Does while(1){} do the same as while(1);

I just want to be sure that this C code:
while(flag==true)
{
}
foo();
does the same as this:
while(flag==true);
foo();
; alone is a null statement in C.
In your case, {} or ; are syntactically needed, but they do the same: nothing
Related: Use of null statement in C
In addition to the other answers: It's the same thing.
But I prefer this:
while (condition)
{
}
foo();
over this:
while (condition);
foo();
because if you forget the semicolon after the while, your code will compile fine but it won't do what you expect:
while(condition) // ; forgotten here
foo();
will actually be equivalent of:
while(condition)
{
foo();
}
Yes, having an empty body of the loop is equivaled to just while(<some condition>);
Yes. A ; following a control structure (e.g., while, for, etc.) that can be followed with a block is treated as if it was followed by an empty block.
Yes, because when put semicolon after while loop statement that indicate empty body and when the condition becomes false then it goes to the immediate next statement after that loop. 
Yes, they are same.
You Can Generate The assembly of the code and see for yourself that they produce the same assembly. (Using gcc filename.c -S -masm=intel -o ouputfilename)
#include<stdio.h>
int foo(void);
int main(){
int flag;
scanf("%d" , &flag);
while(flag==1);
foo();
}
int foo(void){
int x = 2;
return x*x;
}
.LC0:
.ascii "%d\0"
.text
.globl main
.def main; .scl 2; .type 32; .endef
.seh_proc main
main:
push rbp
.seh_pushreg rbp
mov rbp, rsp
.seh_setframe rbp, 0
sub rsp, 48
.seh_stackalloc 48
.seh_endprologue
call __main
lea rax, -4[rbp]
mov rdx, rax
lea rcx, .LC0[rip]
call scanf
nop
.L2:
mov eax, DWORD PTR -4[rbp]
cmp eax, 1
je .L2
call foo
mov eax, 0
add rsp, 48
pop rbp
ret
.seh_endproc
.globl foo
.def foo; .scl 2; .type 32; .endef
.seh_proc foo
foo:
push rbp
.seh_pushreg rbp
mov rbp, rsp
.seh_setframe rbp, 0
sub rsp, 16
.seh_stackalloc 16
.seh_endprologue
mov DWORD PTR -4[rbp], 2
mov eax, DWORD PTR -4[rbp]
imul eax, DWORD PTR -4[rbp]
add rsp, 16
pop rbp
ret
.seh_endproc
.ident "GCC: (x86_64-posix-seh-rev1, Built by MinGW-W64 project) 6.3.0"
.def scanf; .scl 2; .type 32; .endef
And When I Changed while(flag == 1); to while(flag==1){} Assembly Code Generated is :
.LC0:
.ascii "%d\0"
.text
.globl main
.def main; .scl 2; .type 32; .endef
.seh_proc main
main:
push rbp
.seh_pushreg rbp
mov rbp, rsp
.seh_setframe rbp, 0
sub rsp, 48
.seh_stackalloc 48
.seh_endprologue
call __main
lea rax, -4[rbp]
mov rdx, rax
lea rcx, .LC0[rip]
call scanf
nop
.L2:
mov eax, DWORD PTR -4[rbp]
cmp eax, 1
je .L2
call foo
mov eax, 0
add rsp, 48
pop rbp
ret
.seh_endproc
.globl foo
.def foo; .scl 2; .type 32; .endef
.seh_proc foo
foo:
push rbp
.seh_pushreg rbp
mov rbp, rsp
.seh_setframe rbp, 0
sub rsp, 16
.seh_stackalloc 16
.seh_endprologue
mov DWORD PTR -4[rbp], 2
mov eax, DWORD PTR -4[rbp]
imul eax, DWORD PTR -4[rbp]
add rsp, 16
pop rbp
ret
.seh_endproc
.ident "GCC: (x86_64-posix-seh-rev1, Built by MinGW-W64 project) 6.3.0"
.def scanf; .scl 2; .type 32; .endef
You can see that the relevant portion is same in both cases.
//Below Portion is same in both cases.
.L2:
mov eax, DWORD PTR -4[rbp]
cmp eax, 1
je .L2
call foo
mov eax, 0
add rsp, 48
pop rbp
ret
.seh_endproc
.globl foo
.def foo; .scl 2; .type 32; .endef
.seh_proc foo

Understanding these assembly instructions [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 7 years ago.
Improve this question
I'm learning assembly by writing C programs and viewing the assembly output. I've included the C program at the bottom for the page to make it easier. I'm struggling to understand one line of assembly:
cdqe
movzx eax, BYTE PTR [rbp-32+rax] <--- what is this doing?
movsx eax, al
So I think cdqe extends eax into rax (64 bits). Its clear that the string I want to print fits into the al register but I don't understand what is happening deep down with rbp-32+rax. Can someone explain for me?
.file "string_manip.c"
.intel_syntax noprefix
.section .rodata
.LC0:
.string "Hello"
.string ""
.zero 3
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
push rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
mov rbp, rsp
.cfi_def_cfa_register 6
sub rsp, 48
mov rax, QWORD PTR fs:40
mov QWORD PTR [rbp-8], rax
xor eax, eax
mov DWORD PTR [rbp-36], 0
mov eax, DWORD PTR .LC0[rip]
mov DWORD PTR [rbp-32], eax
movzx eax, WORD PTR .LC0[rip+4]
mov WORD PTR [rbp-28], ax
movzx eax, BYTE PTR .LC0[rip+6]
mov BYTE PTR [rbp-26], al
mov WORD PTR [rbp-25], 0
mov BYTE PTR [rbp-23], 0
mov DWORD PTR [rbp-36], 0
jmp .L2
.L3:
mov eax, DWORD PTR [rbp-36]
cdqe
movzx eax, BYTE PTR [rbp-32+rax] <--- what is this doing?
movsx eax, al
mov edi, eax
call putchar
add DWORD PTR [rbp-36], 1
.L2:
cmp DWORD PTR [rbp-36], 5
jle .L3
mov edi, 10
call putchar
mov eax, 0
mov rdx, QWORD PTR [rbp-8]
xor rdx, QWORD PTR fs:40
je .L5
call __stack_chk_fail
.L5:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 4.8.4-2ubuntu1~14.04) 4.8.4"
.section .note.GNU-stack,"",#progbits
#include <string.h>
#include <stdio.h>
int main()
{
int i = 0;
char array[10] = "Hello\0";
for(i=0; i<6; i++)
printf("%c", array[i]);
printf("\n");
return 0;
}
It's just calculating the address of one of the characters.
Presumably your string starts at rbp-32 and then the instruction does the C equivalent of ch = string[rax].
I guess this is unoptimized code, so the compiler does a few extra sign extend and zero extend that are not really needed.

How to use lea instruction in a subroutine using GAS

I'm trying to convert a NASM code to GAS. I can't make the lea instruction work.
Here's my original code and this completely works:
section .bss
arr resb 10
section .text
global _start:
_start:
push arr
call getInput
...
getInput:
mov esi, 0
mov ebp, [esp+4]
loop:
...
mov eax, 3
mov ebx, 0
lea ecx, [ebp+esi]
mov edx, 2
int 80h
...
And here's the GAS counterpart I'm trying to write:
.data
arr: .space 10
.text
.globl _start
_start:
push arr
call getInput
...
getInput:
movl $0, %esi
movl 4(%esp,1), %ebp
loop:
...
movl $3, %eax
movl $0, %ebx
leal (%ebp,%esi), %ecx
movl $2, %edx
int $0x80
I've been searching for hours on how to properly do it but I can't find a tutorial on this matter. It produces a segmentation fault when I run it. Please help me.
P.S. I use these commands to compile and link (thanks to someone here who answered my previous question):
as --32 -o sample.o sample.s
ld -m elf_i386 -o sample sample.o

Resources