I am trying to convert an assembly program into null-free shellcode.
However, I am unsure how to go about this for certain instructions. Some of them way more complex than the examples I found in the web.
I used a C program from the web as an example, then converted to .s,using gcc -Wall -O -fverbose-asm -S example.c
gcc -c example.s -o example.o
gcc example.o -o example
objdump -d example
.file "test.c"
# GNU C (Debian 4.9.2-10) version 4.9.2 (x86_64-linux-gnu)
# compiled by GNU C version 4.9.2, GMP version 6.0.0, MPFR version 3.1.2-p3, MPC version 1.0.2
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -imultiarch x86_64-linux-gnu test.c -mtune=generic
# -march=x86-64 -O -Wall -fverbose-asm
# options enabled: -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg
# -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers
# -fdefer-pop -fdelete-null-pointer-checks -fdwarf2-cfi-asm
# -fearly-inlining -feliminate-unused-debug-types -fforward-propagate
# -ffunction-cse -fgcse-lm -fgnu-runtime -fgnu-unique
# -fguess-branch-probability -fident -fif-conversion -fif-conversion2
# -finline -finline-atomics -finline-functions-called-once -fipa-profile
# -fipa-pure-const -fipa-reference -fira-hoist-pressure
# -fira-share-save-slots -fira-share-spill-slots -fivopts
# -fkeep-static-consts -fleading-underscore -fmath-errno -fmerge-constants
# -fmerge-debug-strings -fmove-loop-invariants -fomit-frame-pointer
# -fpeephole -fprefetch-loop-arrays -freg-struct-return
# -fsched-critical-path-heuristic -fsched-dep-count-heuristic
# -fsched-group-heuristic -fsched-interblock -fsched-last-insn-heuristic
# -fsched-rank-heuristic -fsched-spec -fsched-spec-insn-heuristic
# -fsched-stalled-insns-dep -fshow-column -fshrink-wrap -fsigned-zeros
# -fsplit-ivs-in-unroller -fsplit-wide-types -fstrict-volatile-bitfields
# -fsync-libcalls -ftoplevel-reorder -ftrapping-math -ftree-bit-ccp
# -ftree-ccp -ftree-ch -ftree-coalesce-vars -ftree-copy-prop
# -ftree-copyrename -ftree-cselim -ftree-dce -ftree-dominator-opts
# -ftree-dse -ftree-forwprop -ftree-fre -ftree-loop-if-convert
# -ftree-loop-im -ftree-loop-ivcanon -ftree-loop-optimize
# -ftree-parallelize-loops= -ftree-phiprop -ftree-pta -ftree-reassoc
# -ftree-scev-cprop -ftree-sink -ftree-slsr -ftree-sra -ftree-ter
# -funit-at-a-time -funwind-tables -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -malign-stringops
# -mavx256-split-unaligned-load -mavx256-split-unaligned-store
# -mfancy-math-387 -mfp-ret-in-387 -mfxsr -mglibc -mieee-fp
# -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone -msse -msse2
# -mtls-direct-seg-refs
.section .rodata.str1.1,"aMS",#progbits,1
.LC0:
.string "Could not create socket"
.LC1:
.string "127.0.0.1"
.LC2:
.string "connect error"
.LC3:
.string "Connected\n"
.section .rodata.str1.8,"aMS",#progbits,1
.align 8
.LC4:
.string "Hola Redon , I really appreaciate your help\n\r\n"
.section .rodata.str1.1
.LC5:
.string "Send failed"
.LC6:
.string "Data Send\n"
.LC7:
.string "recv failed"
.LC8:
.string "Reply received\n"
.text
.globl main
.type main, #function
main:
.LFB29:
.cfi_startproc
pushq %rbx #
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
subq $2016, %rsp #,
.cfi_def_cfa_offset 2032
movl $0, %edx #,
movl $1, %esi #,
movl $2, %edi #,
call socket #
movl %eax, %ebx #, socket_desc
cmpl $-1, %eax #, socket_desc
jne .L2 #,
movl $.LC0, %edi #,
movl $0, %eax #,
call printf #
.L2:
movl $.LC1, %edi #,
call inet_addr #
movl %eax, 2004(%rsp) # D.3169, server.sin_addr.s_addr
movw $2, 2000(%rsp) #, server.sin_family
movw $14093, 2002(%rsp) #, server.sin_port
movl $16, %edx #,
leaq 2000(%rsp), %rsi #, tmp96
movl %ebx, %edi # socket_desc,
call connect #
testl %eax, %eax # D.3168
jns .L3 #,
movl $.LC2, %edi #,
call puts #
movl $1, %eax #, D.3168
jmp .L4 #
.L3:
movl $.LC3, %edi #,
call puts #
movl $0, %ecx #,
movl $46, %edx #,
movl $.LC4, %esi #,
movl %ebx, %edi # socket_desc,
call send #
testq %rax, %rax # D.3170
jns .L5 #,
movl $.LC5, %edi #,
call puts #
movl $1, %eax #, D.3168
jmp .L4 #
.L5:
movl $.LC6, %edi #,
call puts #
movl $0, %ecx #,
movl $2000, %edx #,
movq %rsp, %rsi #,
movl %ebx, %edi # socket_desc,
call recv #
testq %rax, %rax # D.3170
jns .L6 #,
movl $.LC7, %edi #,
call puts #
.L6:
movl $.LC8, %edi #,
call puts #
movq %rsp, %rdi #,
call puts #
movl $0, %eax #, D.3168
.L4:
addq $2016, %rsp #,
.cfi_def_cfa_offset 16
popq %rbx #
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE29:
.size main, .-main
.ident "GCC: (Debian 4.9.2-10) 4.9.2"
.section .note.GNU-stack,"",#progbits
Wich give this:
test: file format elf64-x86-64
Disassembly of section .init:
0000000000400500 <_init>:
400500: 48 83 ec 08 sub $0x8,%rsp
400504: 48 8b 05 fd 06 20 00 mov 0x2006fd(%rip),%rax # 600c08 <_DYNAMIC+0x1d0>
40050b: 48 85 c0 test %rax,%rax
40050e: 74 05 je 400515 <_init+0x15>
400510: e8 7b 00 00 00 callq 400590 <__gmon_start__#plt>
400515: 48 83 c4 08 add $0x8,%rsp
400519: c3 retq
Disassembly of section .plt:
0000000000400520 <recv#plt-0x10>:
400520: ff 35 f2 06 20 00 pushq 0x2006f2(%rip) # 600c18 <_GLOBAL_OFFSET_TABLE_+0x8>
400526: ff 25 f4 06 20 00 jmpq *0x2006f4(%rip) # 600c20 <_GLOBAL_OFFSET_TABLE_+0x10>
40052c: 0f 1f 40 00 nopl 0x0(%rax)
0000000000400530 <recv#plt>:
400530: ff 25 f2 06 20 00 jmpq *0x2006f2(%rip) # 600c28 <_GLOBAL_OFFSET_TABLE_+0x18>
400536: 68 00 00 00 00 pushq $0x0
40053b: e9 e0 ff ff ff jmpq 400520 <_init+0x20>
0000000000400540 <puts#plt>:
400540: ff 25 ea 06 20 00 jmpq *0x2006ea(%rip) # 600c30 <_GLOBAL_OFFSET_TABLE_+0x20>
400546: 68 01 00 00 00 pushq $0x1
40054b: e9 d0 ff ff ff jmpq 400520 <_init+0x20>
0000000000400550 <send#plt>:
400550: ff 25 e2 06 20 00 jmpq *0x2006e2(%rip) # 600c38 <_GLOBAL_OFFSET_TABLE_+0x28>
400556: 68 02 00 00 00 pushq $0x2
40055b: e9 c0 ff ff ff jmpq 400520 <_init+0x20>
0000000000400560 <printf#plt>:
400560: ff 25 da 06 20 00 jmpq *0x2006da(%rip) # 600c40 <_GLOBAL_OFFSET_TABLE_+0x30>
400566: 68 03 00 00 00 pushq $0x3
40056b: e9 b0 ff ff ff jmpq 400520 <_init+0x20>
0000000000400570 <__libc_start_main#plt>:
400570: ff 25 d2 06 20 00 jmpq *0x2006d2(%rip) # 600c48 <_GLOBAL_OFFSET_TABLE_+0x38>
400576: 68 04 00 00 00 pushq $0x4
40057b: e9 a0 ff ff ff jmpq 400520 <_init+0x20>
0000000000400580 <inet_addr#plt>:
400580: ff 25 ca 06 20 00 jmpq *0x2006ca(%rip) # 600c50 <_GLOBAL_OFFSET_TABLE_+0x40>
400586: 68 05 00 00 00 pushq $0x5
40058b: e9 90 ff ff ff jmpq 400520 <_init+0x20>
0000000000400590 <__gmon_start__#plt>:
400590: ff 25 c2 06 20 00 jmpq *0x2006c2(%rip) # 600c58 <_GLOBAL_OFFSET_TABLE_+0x48>
400596: 68 06 00 00 00 pushq $0x6
40059b: e9 80 ff ff ff jmpq 400520 <_init+0x20>
00000000004005a0 <connect#plt>:
4005a0: ff 25 ba 06 20 00 jmpq *0x2006ba(%rip) # 600c60 <_GLOBAL_OFFSET_TABLE_+0x50>
4005a6: 68 07 00 00 00 pushq $0x7
4005ab: e9 70 ff ff ff jmpq 400520 <_init+0x20>
00000000004005b0 <socket#plt>:
4005b0: ff 25 b2 06 20 00 jmpq *0x2006b2(%rip) # 600c68 <_GLOBAL_OFFSET_TABLE_+0x58>
4005b6: 68 08 00 00 00 pushq $0x8
4005bb: e9 60 ff ff ff jmpq 400520 <_init+0x20>
Disassembly of section .text:
00000000004005c0 <_start>:
4005c0: 31 ed xor %ebp,%ebp
4005c2: 49 89 d1 mov %rdx,%r9
4005c5: 5e pop %rsi
4005c6: 48 89 e2 mov %rsp,%rdx
4005c9: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
4005cd: 50 push %rax
4005ce: 54 push %rsp
4005cf: 49 c7 c0 30 08 40 00 mov $0x400830,%r8
4005d6: 48 c7 c1 c0 07 40 00 mov $0x4007c0,%rcx
4005dd: 48 c7 c7 b6 06 40 00 mov $0x4006b6,%rdi
4005e4: e8 87 ff ff ff callq 400570 <__libc_start_main#plt>
4005e9: f4 hlt
4005ea: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
00000000004005f0 <deregister_tm_clones>:
4005f0: b8 87 0c 60 00 mov $0x600c87,%eax
4005f5: 55 push %rbp
4005f6: 48 2d 80 0c 60 00 sub $0x600c80,%rax
4005fc: 48 83 f8 0e cmp $0xe,%rax
400600: 48 89 e5 mov %rsp,%rbp
400603: 76 1b jbe 400620 <deregister_tm_clones+0x30>
400605: b8 00 00 00 00 mov $0x0,%eax
40060a: 48 85 c0 test %rax,%rax
40060d: 74 11 je 400620 <deregister_tm_clones+0x30>
40060f: 5d pop %rbp
400610: bf 80 0c 60 00 mov $0x600c80,%edi
400615: ff e0 jmpq *%rax
400617: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
40061e: 00 00
400620: 5d pop %rbp
400621: c3 retq
400622: 66 66 66 66 66 2e 0f data16 data16 data16 data16 nopw %cs:0x0(%rax,%rax,1)
400629: 1f 84 00 00 00 00 00
0000000000400630 <register_tm_clones>:
400630: be 80 0c 60 00 mov $0x600c80,%esi
400635: 55 push %rbp
400636: 48 81 ee 80 0c 60 00 sub $0x600c80,%rsi
40063d: 48 c1 fe 03 sar $0x3,%rsi
400641: 48 89 e5 mov %rsp,%rbp
400644: 48 89 f0 mov %rsi,%rax
400647: 48 c1 e8 3f shr $0x3f,%rax
40064b: 48 01 c6 add %rax,%rsi
40064e: 48 d1 fe sar %rsi
400651: 74 15 je 400668 <register_tm_clones+0x38>
400653: b8 00 00 00 00 mov $0x0,%eax
400658: 48 85 c0 test %rax,%rax
40065b: 74 0b je 400668 <register_tm_clones+0x38>
40065d: 5d pop %rbp
40065e: bf 80 0c 60 00 mov $0x600c80,%edi
400663: ff e0 jmpq *%rax
400665: 0f 1f 00 nopl (%rax)
400668: 5d pop %rbp
400669: c3 retq
40066a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0000000000400670 <__do_global_dtors_aux>:
400670: 80 3d 09 06 20 00 00 cmpb $0x0,0x200609(%rip) # 600c80 <__TMC_END__>
400677: 75 11 jne 40068a <__do_global_dtors_aux+0x1a>
400679: 55 push %rbp
40067a: 48 89 e5 mov %rsp,%rbp
40067d: e8 6e ff ff ff callq 4005f0 <deregister_tm_clones>
400682: 5d pop %rbp
400683: c6 05 f6 05 20 00 01 movb $0x1,0x2005f6(%rip) # 600c80 <__TMC_END__>
40068a: f3 c3 repz retq
40068c: 0f 1f 40 00 nopl 0x0(%rax)
0000000000400690 <frame_dummy>:
400690: bf 30 0a 60 00 mov $0x600a30,%edi
400695: 48 83 3f 00 cmpq $0x0,(%rdi)
400699: 75 05 jne 4006a0 <frame_dummy+0x10>
40069b: eb 93 jmp 400630 <register_tm_clones>
40069d: 0f 1f 00 nopl (%rax)
4006a0: b8 00 00 00 00 mov $0x0,%eax
4006a5: 48 85 c0 test %rax,%rax
4006a8: 74 f1 je 40069b <frame_dummy+0xb>
4006aa: 55 push %rbp
4006ab: 48 89 e5 mov %rsp,%rbp
4006ae: ff d0 callq *%rax
4006b0: 5d pop %rbp
4006b1: e9 7a ff ff ff jmpq 400630 <register_tm_clones>
00000000004006b6 <main>:
4006b6: 53 push %rbx
4006b7: 48 81 ec e0 07 00 00 sub $0x7e0,%rsp
4006be: ba 00 00 00 00 mov $0x0,%edx
4006c3: be 01 00 00 00 mov $0x1,%esi
4006c8: bf 02 00 00 00 mov $0x2,%edi
4006cd: e8 de fe ff ff callq 4005b0 <socket#plt>
4006d2: 89 c3 mov %eax,%ebx
4006d4: 83 f8 ff cmp $0xffffffff,%eax
4006d7: 75 0f jne 4006e8 <main+0x32>
4006d9: bf 44 08 40 00 mov $0x400844,%edi
4006de: b8 00 00 00 00 mov $0x0,%eax
4006e3: e8 78 fe ff ff callq 400560 <printf#plt>
4006e8: bf 5c 08 40 00 mov $0x40085c,%edi
4006ed: e8 8e fe ff ff callq 400580 <inet_addr#plt>
4006f2: 89 84 24 d4 07 00 00 mov %eax,0x7d4(%rsp)
4006f9: 66 c7 84 24 d0 07 00 movw $0x2,0x7d0(%rsp)
400700: 00 02 00
400703: 66 c7 84 24 d2 07 00 movw $0x370d,0x7d2(%rsp)
40070a: 00 0d 37
40070d: ba 10 00 00 00 mov $0x10,%edx
400712: 48 8d b4 24 d0 07 00 lea 0x7d0(%rsp),%rsi
400719: 00
40071a: 89 df mov %ebx,%edi
40071c: e8 7f fe ff ff callq 4005a0 <connect#plt>
400721: 85 c0 test %eax,%eax
400723: 79 11 jns 400736 <main+0x80>
400725: bf 66 08 40 00 mov $0x400866,%edi
40072a: e8 11 fe ff ff callq 400540 <puts#plt>
40072f: b8 01 00 00 00 mov $0x1,%eax
400734: eb 7a jmp 4007b0 <main+0xfa>
400736: bf 74 08 40 00 mov $0x400874,%edi
40073b: e8 00 fe ff ff callq 400540 <puts#plt>
400740: b9 00 00 00 00 mov $0x0,%ecx
400745: ba 2e 00 00 00 mov $0x2e,%edx
40074a: be b8 08 40 00 mov $0x4008b8,%esi
40074f: 89 df mov %ebx,%edi
400751: e8 fa fd ff ff callq 400550 <send#plt>
400756: 48 85 c0 test %rax,%rax
400759: 79 11 jns 40076c <main+0xb6>
40075b: bf 7f 08 40 00 mov $0x40087f,%edi
400760: e8 db fd ff ff callq 400540 <puts#plt>
400765: b8 01 00 00 00 mov $0x1,%eax
40076a: eb 44 jmp 4007b0 <main+0xfa>
40076c: bf 8b 08 40 00 mov $0x40088b,%edi
400771: e8 ca fd ff ff callq 400540 <puts#plt>
400776: b9 00 00 00 00 mov $0x0,%ecx
40077b: ba d0 07 00 00 mov $0x7d0,%edx
400780: 48 89 e6 mov %rsp,%rsi
400783: 89 df mov %ebx,%edi
400785: e8 a6 fd ff ff callq 400530 <recv#plt>
40078a: 48 85 c0 test %rax,%rax
40078d: 79 0a jns 400799 <main+0xe3>
40078f: bf 96 08 40 00 mov $0x400896,%edi
400794: e8 a7 fd ff ff callq 400540 <puts#plt>
400799: bf a2 08 40 00 mov $0x4008a2,%edi
40079e: e8 9d fd ff ff callq 400540 <puts#plt>
4007a3: 48 89 e7 mov %rsp,%rdi
4007a6: e8 95 fd ff ff callq 400540 <puts#plt>
4007ab: b8 00 00 00 00 mov $0x0,%eax
4007b0: 48 81 c4 e0 07 00 00 add $0x7e0,%rsp
4007b7: 5b pop %rbx
4007b8: c3 retq
4007b9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
00000000004007c0 <__libc_csu_init>:
4007c0: 41 57 push %r15
4007c2: 41 89 ff mov %edi,%r15d
4007c5: 41 56 push %r14
4007c7: 49 89 f6 mov %rsi,%r14
4007ca: 41 55 push %r13
4007cc: 49 89 d5 mov %rdx,%r13
4007cf: 41 54 push %r12
4007d1: 4c 8d 25 48 02 20 00 lea 0x200248(%rip),%r12 # 600a20 <__frame_dummy_init_array_entry>
4007d8: 55 push %rbp
4007d9: 48 8d 2d 48 02 20 00 lea 0x200248(%rip),%rbp # 600a28 <__init_array_end>
4007e0: 53 push %rbx
4007e1: 4c 29 e5 sub %r12,%rbp
4007e4: 31 db xor %ebx,%ebx
4007e6: 48 c1 fd 03 sar $0x3,%rbp
4007ea: 48 83 ec 08 sub $0x8,%rsp
4007ee: e8 0d fd ff ff callq 400500 <_init>
4007f3: 48 85 ed test %rbp,%rbp
4007f6: 74 1e je 400816 <__libc_csu_init+0x56>
4007f8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
4007ff: 00
400800: 4c 89 ea mov %r13,%rdx
400803: 4c 89 f6 mov %r14,%rsi
400806: 44 89 ff mov %r15d,%edi
400809: 41 ff 14 dc callq *(%r12,%rbx,8)
40080d: 48 83 c3 01 add $0x1,%rbx
400811: 48 39 eb cmp %rbp,%rbx
400814: 75 ea jne 400800 <__libc_csu_init+0x40>
400816: 48 83 c4 08 add $0x8,%rsp
40081a: 5b pop %rbx
40081b: 5d pop %rbp
40081c: 41 5c pop %r12
40081e: 41 5d pop %r13
400820: 41 5e pop %r14
400822: 41 5f pop %r15
400824: c3 retq
400825: 66 66 2e 0f 1f 84 00 data16 nopw %cs:0x0(%rax,%rax,1)
40082c: 00 00 00 00
0000000000400830 <__libc_csu_fini>:
400830: f3 c3 repz retq
Disassembly of section .fini:
0000000000400834 <_fini>:
400834: 48 83 ec 08 sub $0x8,%rsp
400838: 48 83 c4 08 add $0x8,%rsp
40083c: c3 retq
As you can see it´s a lot .... First question would be... null bytes from my .init and .plt seccion will affect my shellcode or just only the ones in my .text Section?.
I know for example in a case where I have :
ba 00 00 00 00 mov $0x0,%edx I can apply xor %edx,edx
b8 01 00 00 00 mov $0x1,%eax, can apply xor %eax,eax aswell?
but what about the cases like :
ff 35 f2 06 20 00 pushq 0x2006f2(%rip) or
66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
Since Im a newbie who wants to learn any help would be appreciate, Yes I have read Smashing The Stack For Fun And Profit but english is not my mother language and like I said Im a newbie so not everything is clear for me.
Thanks in advanced.
Related
I decided to compile a very basic C program and take a look at the generated code with objdump -d.
int main(int argc, char *argv[]) {
exit(0);
}
After compiling it with gcc test.c -s -o test.o and then disassembling with objdump -d my text segment looked like this:
Disassembly of section .text:
0000000000001050 <.text>:
1050: 31 ed xor %ebp,%ebp
1052: 49 89 d1 mov %rdx,%r9
1055: 5e pop %rsi
1056: 48 89 e2 mov %rsp,%rdx
1059: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
105d: 50 push %rax
105e: 54 push %rsp
105f: 4c 8d 05 4a 01 00 00 lea 0x14a(%rip),%r8 # 11b0 <__cxa_finalize#plt+0x170>
1066: 48 8d 0d e3 00 00 00 lea 0xe3(%rip),%rcx # 1150 <__cxa_finalize#plt+0x110>
106d: 48 8d 3d c1 00 00 00 lea 0xc1(%rip),%rdi # 1135 <__cxa_finalize#plt+0xf5>
1074: ff 15 66 2f 00 00 callq *0x2f66(%rip) # 3fe0 <__cxa_finalize#plt+0x2fa0>
107a: f4 hlt
107b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
1080: 48 8d 3d a9 2f 00 00 lea 0x2fa9(%rip),%rdi # 4030 <__cxa_finalize#plt+0x2ff0>
1087: 48 8d 05 a2 2f 00 00 lea 0x2fa2(%rip),%rax # 4030 <__cxa_finalize#plt+0x2ff0>
108e: 48 39 f8 cmp %rdi,%rax
1091: 74 15 je 10a8 <__cxa_finalize#plt+0x68>
1093: 48 8b 05 3e 2f 00 00 mov 0x2f3e(%rip),%rax # 3fd8 <__cxa_finalize#plt+0x2f98>
109a: 48 85 c0 test %rax,%rax
109d: 74 09 je 10a8 <__cxa_finalize#plt+0x68>
109f: ff e0 jmpq *%rax
10a1: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
10a8: c3 retq
10a9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
10b0: 48 8d 3d 79 2f 00 00 lea 0x2f79(%rip),%rdi # 4030 <__cxa_finalize#plt+0x2ff0>
10b7: 48 8d 35 72 2f 00 00 lea 0x2f72(%rip),%rsi # 4030 <__cxa_finalize#plt+0x2ff0>
10be: 48 29 fe sub %rdi,%rsi
10c1: 48 c1 fe 03 sar $0x3,%rsi
10c5: 48 89 f0 mov %rsi,%rax
10c8: 48 c1 e8 3f shr $0x3f,%rax
10cc: 48 01 c6 add %rax,%rsi
10cf: 48 d1 fe sar %rsi
10d2: 74 14 je 10e8 <__cxa_finalize#plt+0xa8>
10d4: 48 8b 05 15 2f 00 00 mov 0x2f15(%rip),%rax # 3ff0 <__cxa_finalize#plt+0x2fb0>
10db: 48 85 c0 test %rax,%rax
10de: 74 08 je 10e8 <__cxa_finalize#plt+0xa8>
10e0: ff e0 jmpq *%rax
10e2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
10e8: c3 retq
10e9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
10f0: 80 3d 39 2f 00 00 00 cmpb $0x0,0x2f39(%rip) # 4030 <__cxa_finalize#plt+0x2ff0>
10f7: 75 2f jne 1128 <__cxa_finalize#plt+0xe8>
10f9: 55 push %rbp
10fa: 48 83 3d f6 2e 00 00 cmpq $0x0,0x2ef6(%rip) # 3ff8 <__cxa_finalize#plt+0x2fb8>
1101: 00
1102: 48 89 e5 mov %rsp,%rbp
1105: 74 0c je 1113 <__cxa_finalize#plt+0xd3>
1107: 48 8b 3d 1a 2f 00 00 mov 0x2f1a(%rip),%rdi # 4028 <__cxa_finalize#plt+0x2fe8>
110e: e8 2d ff ff ff callq 1040 <__cxa_finalize#plt>
1113: e8 68 ff ff ff callq 1080 <__cxa_finalize#plt+0x40>
1118: c6 05 11 2f 00 00 01 movb $0x1,0x2f11(%rip) # 4030 <__cxa_finalize#plt+0x2ff0>
111f: 5d pop %rbp
1120: c3 retq
1121: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
1128: c3 retq
1129: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
1130: e9 7b ff ff ff jmpq 10b0 <__cxa_finalize#plt+0x70>
1135: 55 push %rbp
1136: 48 89 e5 mov %rsp,%rbp
1139: 48 83 ec 10 sub $0x10,%rsp
113d: 89 7d fc mov %edi,-0x4(%rbp)
1140: 48 89 75 f0 mov %rsi,-0x10(%rbp)
1144: bf 00 00 00 00 mov $0x0,%edi
1149: e8 e2 fe ff ff callq 1030 <exit#plt>
114e: 66 90 xchg %ax,%ax
1150: 41 57 push %r15
1152: 4c 8d 3d 8f 2c 00 00 lea 0x2c8f(%rip),%r15 # 3de8 <__cxa_finalize#plt+0x2da8>
1159: 41 56 push %r14
115b: 49 89 d6 mov %rdx,%r14
115e: 41 55 push %r13
1160: 49 89 f5 mov %rsi,%r13
1163: 41 54 push %r12
1165: 41 89 fc mov %edi,%r12d
1168: 55 push %rbp
1169: 48 8d 2d 80 2c 00 00 lea 0x2c80(%rip),%rbp # 3df0 <__cxa_finalize#plt+0x2db0>
1170: 53 push %rbx
1171: 4c 29 fd sub %r15,%rbp
1174: 48 83 ec 08 sub $0x8,%rsp
1178: e8 83 fe ff ff callq 1000 <exit#plt-0x30>
117d: 48 c1 fd 03 sar $0x3,%rbp
1181: 74 1b je 119e <__cxa_finalize#plt+0x15e>
1183: 31 db xor %ebx,%ebx
1185: 0f 1f 00 nopl (%rax)
1188: 4c 89 f2 mov %r14,%rdx
118b: 4c 89 ee mov %r13,%rsi
118e: 44 89 e7 mov %r12d,%edi
1191: 41 ff 14 df callq *(%r15,%rbx,8)
1195: 48 83 c3 01 add $0x1,%rbx
1199: 48 39 dd cmp %rbx,%rbp
119c: 75 ea jne 1188 <__cxa_finalize#plt+0x148>
119e: 48 83 c4 08 add $0x8,%rsp
11a2: 5b pop %rbx
11a3: 5d pop %rbp
11a4: 41 5c pop %r12
11a6: 41 5d pop %r13
11a8: 41 5e pop %r14
11aa: 41 5f pop %r15
11ac: c3 retq
11ad: 0f 1f 00 nopl (%rax)
11b0: c3 retq
As you can see, the part that was actually written by me occupies very little space.
The same program (if we ignore the fact that the main function is also treated as a function in C) in Assembly:
.global _start
.text
_start: mov $60, %rax
xor %rdi, %rdi
syscall
Assembled, linked and disassembled with gcc -c demo.s && ld demo.o -o demo && objdump -d demo:
Disassembly of section .text:
0000000000401000 <_start>:
401000: 48 c7 c0 3c 00 00 00 mov $0x3c,%rax
401007: 48 31 ff xor %rdi,%rdi
40100a: 0f 05 syscall
The question is: what purpose do all these instructions serve and is there a way to generate code without them?
While I was writing the question I noticed that the C program calls exit() from the linked library whereas in Assembly I do it directly with a syscall. I don't think it is important in this case though.
gcc generates unnecessary (?) instructions
Yes, because you invoked GCC without asking for any compiler optimizations.
My recommendation: compile with
gcc -fverbose-asm -O2 -S test.c
then look inside the generated test.s assembler code.
BTW, most of the code is from crt0, which is given by, not emitted by, gcc. Build your executable with gcc -O2 -v test.c -o testprog to understand what GCC really does. Read documentation of GCC internals.
Since GCC is free software, you are allowed to look inside its source code and improve it. But the crt0 stuff is tricky, and operating system specific.
Consider also reading about linkers and loaders, about ELF executables, and How to write shared libraries, and the Linux Assembler HowTo.
gcc -s strips symbol names out of the final executable so you can't tell where different parts of the machine code came from.
Most of it is not from your main. To just see that, look at gcc -S output (asm source), e.g. on https://godbolt.org/. How to remove "noise" from GCC/clang assembly output?
Most of that is the CRT (C RunTime) startup code that eventually calls your main after initializing the standard library. (e.g. allocating memory for stdio buffers and so on.) It gets linked in regardless of how efficient your main is. e.g. compiling an empty int main(void){} with gcc -Os (optimize for size) will barely make it any smaller.
You could in theory compile with gcc -nostdlib and write your own _start that uses inline asm to make an exit system call.
See also
A Whirlwind Tutorial on Creating Really Teensy ELF Executables for Linux
How Get arguments value using inline assembly in C without Glibc? (getting command line args complicates the exercise of writing your own _start, but the answers there show how).
C program does a lots of stuff before calling the main function. It has to initialize .data and .bss segments, set the stack, go through the constructors and destructors (yes gcc in C has a special attributes for such a functions) and initializes the library.
gcc destructor and constructor functions:
void __attribute__ ((constructor)) funcname(void);
void __attribute__ ((destructor)) funcname(void);
you may have as many constructors and destructors as you wish.
constructors are called before call to the main function, destructors on exit from the program (after the main termination)
https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Function-Attributes.html#Function-Attributes
I'm not understanding what the function below does. From what I gather, function 4 does something like (x+x)*2 or it does something like (high-low)/2 if a condition is reached. (I might be wrong on this). From reading the code, I also understood that in order to "defuse" the bomb. I need two decimal inputs, and the second one should be 14.
I'm stuck trying to figure out the first value, and trying to identify the correct formula to use in order to figure out the first value.
Function_4
00000000004010b2 <func4>:
4010b2: 55 push %rbp
4010b3: 48 89 e5 mov %rsp,%rbp
4010b6: 89 d0 mov %edx,%eax
4010b8: 29 f0 sub %esi,%eax
4010ba: 89 c1 mov %eax,%ecx
4010bc: c1 e9 1f shr $0x1f,%ecx
4010bf: 01 c8 add %ecx,%eax
4010c1: d1 f8 sar %eax
4010c3: 8d 0c 30 lea (%rax,%rsi,1),%ecx
4010c6: 39 f9 cmp %edi,%ecx
4010c8: 7e 0c jle 4010d6 <func4+0x24>
4010ca: 8d 51 ff lea -0x1(%rcx),%edx
4010cd: e8 e0 ff ff ff callq 4010b2 <func4>
4010d2: 01 c0 add %eax,%eax
4010d4: eb 15 jmp 4010eb <func4+0x39>
4010d6: b8 00 00 00 00 mov $0x0,%eax
4010db: 39 f9 cmp %edi,%ecx
4010dd: 7d 0c jge 4010eb <func4+0x39>
4010df: 8d 71 01 lea 0x1(%rcx),%esi
4010e2: e8 cb ff ff ff callq 4010b2 <func4>
4010e7: 8d 44 00 01 lea 0x1(%rax,%rax,1),%eax
4010eb: 5d pop %rbp
4010ec: c3 retq
Phase_4
00000000004010ed <phase_4>:
4010ed: 55 push %rbp
4010ee: 48 89 e5 mov %rsp,%rbp
4010f1: 48 83 ec 10 sub $0x10,%rsp
4010f5: 48 8d 4d fc lea -0x4(%rbp),%rcx
4010f9: 48 8d 55 f8 lea -0x8(%rbp),%rdx
4010fd: be 6d 2a 40 00 mov $0x402a6d,%esi
401102: b8 00 00 00 00 mov $0x0,%eax
401107: e8 a4 fb ff ff callq 400cb0 <__isoc99_sscanf#plt>
40110c: 83 f8 02 cmp $0x2,%eax
40110f: 75 0b jne 40111c <phase_4+0x2f>
401111: 8b 45 f8 mov -0x8(%rbp),%eax
401114: 83 e8 20 sub $0x20,%eax
401117: 83 f8 1e cmp $0x1e,%eax
40111a: 76 05 jbe 401121 <phase_4+0x34>
40111c: e8 b4 05 00 00 callq 4016d5 <explode_bomb>
401121: ba 3e 00 00 00 mov $0x3e,%edx
401126: be 20 00 00 00 mov $0x20,%esi
40112b: 8b 7d f8 mov -0x8(%rbp),%edi
40112e: e8 7f ff ff ff callq 4010b2 <func4>
401133: 83 f8 0e cmp $0xe,%eax
401136: 75 06 jne 40113e <phase_4+0x51>
401138: 83 7d fc 0e cmpl $0xe,-0x4(%rbp)
40113c: 74 05 je 401143 <phase_4+0x56>
40113e: e8 92 05 00 00 callq 4016d5 <explode_bomb>
401143: c9 leaveq
401144: c3 retq
Consider the code snippet below.
The entry point of the program is main as defined in C-source code. Now, normally a function starts by decreasing %rsp to reserve space for local variables. But here, the GCC compiler reserves this space in some of the added (initial) functions.
My question is, where do I look for the number of bytes of reserved variables in these GCC-specific initialization functions? In this case, the number of reserved bytes is 0x08.
Also, in what order are these initial functions called?
00000000004003c0 <_start>:
4003c0: 31 ed xor ebp,ebp
4003c2: 49 89 d1 mov r9,rdx
4003c5: 5e pop rsi
4003c6: 48 89 e2 mov rdx,rsp
4003c9: 48 83 e4 f0 and rsp,0xfffffffffffffff0
4003cd: 50 push rax
4003ce: 54 push rsp
4003cf: 49 c7 c0 a0 05 40 00 mov r8,0x4005a0
4003d6: 48 c7 c1 30 05 40 00 mov rcx,0x400530
4003dd: 48 c7 c7 c0 04 40 00 mov rdi,0x4004c0
4003e4: e8 b7 ff ff ff call 4003a0 <__libc_start_main#plt>
4003e9: f4 hlt
4003ea: 66 0f 1f 44 00 00 nop WORD PTR [rax+rax*1+0x0]
00000000004003f0 <deregister_tm_clones>:
4003f0: b8 37 10 60 00 mov eax,0x601037
4003f5: 55 push rbp
4003f6: 48 2d 30 10 60 00 sub rax,0x601030
4003fc: 48 83 f8 0e cmp rax,0xe
400400: 48 89 e5 mov rbp,rsp
400403: 76 1b jbe 400420 <deregister_tm_clones+0x30>
400405: b8 00 00 00 00 mov eax,0x0
40040a: 48 85 c0 test rax,rax
40040d: 74 11 je 400420 <deregister_tm_clones+0x30>
40040f: 5d pop rbp
400410: bf 30 10 60 00 mov edi,0x601030
400415: ff e0 jmp rax
400417: 66 0f 1f 84 00 00 00 nop WORD PTR [rax+rax*1+0x0]
40041e: 00 00
400420: 5d pop rbp
400421: c3 ret
400422: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
400426: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40042d: 00 00 00
0000000000400430 <register_tm_clones>:
400430: be 30 10 60 00 mov esi,0x601030
400435: 55 push rbp
400436: 48 81 ee 30 10 60 00 sub rsi,0x601030
40043d: 48 c1 fe 03 sar rsi,0x3
400441: 48 89 e5 mov rbp,rsp
400444: 48 89 f0 mov rax,rsi
400447: 48 c1 e8 3f shr rax,0x3f
40044b: 48 01 c6 add rsi,rax
40044e: 48 d1 fe sar rsi,1
400451: 74 15 je 400468 <register_tm_clones+0x38>
400453: b8 00 00 00 00 mov eax,0x0
400458: 48 85 c0 test rax,rax
40045b: 74 0b je 400468 <register_tm_clones+0x38>
40045d: 5d pop rbp
40045e: bf 30 10 60 00 mov edi,0x601030
400463: ff e0 jmp rax
400465: 0f 1f 00 nop DWORD PTR [rax]
400468: 5d pop rbp
400469: c3 ret
40046a: 66 0f 1f 44 00 00 nop WORD PTR [rax+rax*1+0x0]
0000000000400470 <__do_global_dtors_aux>:
400470: 80 3d b9 0b 20 00 00 cmp BYTE PTR [rip+0x200bb9],0x0 # 601030 <__TMC_END__>
400477: 75 11 jne 40048a <__do_global_dtors_aux+0x1a>
400479: 55 push rbp
40047a: 48 89 e5 mov rbp,rsp
40047d: e8 6e ff ff ff call 4003f0 <deregister_tm_clones>
400482: 5d pop rbp
400483: c6 05 a6 0b 20 00 01 mov BYTE PTR [rip+0x200ba6],0x1 # 601030 <__TMC_END__>
40048a: f3 c3 repz ret
40048c: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
0000000000400490 <frame_dummy>:
400490: bf 20 0e 60 00 mov edi,0x600e20
400495: 48 83 3f 00 cmp QWORD PTR [rdi],0x0
400499: 75 05 jne 4004a0 <frame_dummy+0x10>
40049b: eb 93 jmp 400430 <register_tm_clones>
40049d: 0f 1f 00 nop DWORD PTR [rax]
4004a0: b8 00 00 00 00 mov eax,0x0
4004a5: 48 85 c0 test rax,rax
4004a8: 74 f1 je 40049b <frame_dummy+0xb>
4004aa: 55 push rbp
4004ab: 48 89 e5 mov rbp,rsp
4004ae: ff d0 call rax
4004b0: 5d pop rbp
4004b1: e9 7a ff ff ff jmp 400430 <register_tm_clones>
4004b6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
4004bd: 00 00 00
00000000004004c0 <main>:
4004c0: 55 push rbp
4004c1: 48 89 e5 mov rbp,rsp
4004c4: c7 45 f8 00 00 00 00 mov DWORD PTR [rbp-0x8],0x0
4004cb: c7 45 fc 01 00 00 00 mov DWORD PTR [rbp-0x4],0x1
4004d2: eb 46 jmp 40051a <.cend>
4004d4: 66 66 66 2e 0f 1f 84 data16 data16 nop WORD PTR cs:[rax+rax*1+0x0]
4004db: 00 00 00 00 00
4004e0: ff 05 4e 0b 20 00 inc DWORD PTR [rip+0x200b4e] # 601034 <sum>
4004e6: 50 push rax
4004e7: 53 push rbx
4004e8: 56 push rsi
4004e9: 48 31 c0 xor rax,rax
4004ec: 48 c7 c6 14 05 40 00 mov rsi,0x400514
00000000004004f3 <.cloop>:
4004f3: 48 0f b6 1e movzx rbx,BYTE PTR [rsi]
4004f7: 48 31 d8 xor rax,rbx
4004fa: 48 ff c6 inc rsi
4004fd: 48 81 fe 1a 05 40 00 cmp rsi,0x40051a
400504: 75 ed jne 4004f3 <.cloop>
400506: 48 83 f8 00 cmp rax,0x0
40050a: 74 05 je 400511 <.restore>
40050c: 48 31 c0 xor rax,rax
40050f: ff d0 call rax
0000000000400511 <.restore>:
400511: 5e pop rsi
400512: 5b pop rbx
400513: 58 pop rax
0000000000400514 <.cstart>:
400514: eb 01 jmp 400517 <.end>
0000000000400516 <.cslot>:
400516: ac lods al,BYTE PTR ds:[rsi]
0000000000400517 <.end>:
400517: ff 45 fc inc DWORD PTR [rbp-0x4]
000000000040051a <.cend>:
40051a: 83 7d fc 1e cmp DWORD PTR [rbp-0x4],0x1e
40051e: 7e c0 jle 4004e0 <main+0x20>
400520: 8b 05 0e 0b 20 00 mov eax,DWORD PTR [rip+0x200b0e] # 601034 <sum>
400526: 5d pop rbp
400527: c3 ret
400528: 0f 1f 84 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
40052f: 00
0000000000400530 <__libc_csu_init>:
400530: 41 57 push r15
400532: 41 56 push r14
400534: 41 89 ff mov r15d,edi
400537: 41 55 push r13
400539: 41 54 push r12
40053b: 4c 8d 25 ce 08 20 00 lea r12,[rip+0x2008ce] # 600e10 <__frame_dummy_init_array_entry>
400542: 55 push rbp
400543: 48 8d 2d ce 08 20 00 lea rbp,[rip+0x2008ce] # 600e18 <__init_array_end>
40054a: 53 push rbx
40054b: 49 89 f6 mov r14,rsi
40054e: 49 89 d5 mov r13,rdx
400551: 4c 29 e5 sub rbp,r12
400554: 48 83 ec 08 sub rsp,0x8
400558: 48 c1 fd 03 sar rbp,0x3
40055c: e8 0f fe ff ff call 400370 <_init>
400561: 48 85 ed test rbp,rbp
400564: 74 20 je 400586 <__libc_csu_init+0x56>
400566: 31 db xor ebx,ebx
400568: 0f 1f 84 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
40056f: 00
400570: 4c 89 ea mov rdx,r13
400573: 4c 89 f6 mov rsi,r14
400576: 44 89 ff mov edi,r15d
400579: 41 ff 14 dc call QWORD PTR [r12+rbx*8]
40057d: 48 83 c3 01 add rbx,0x1
400581: 48 39 eb cmp rbx,rbp
400584: 75 ea jne 400570 <__libc_csu_init+0x40>
400586: 48 83 c4 08 add rsp,0x8
40058a: 5b pop rbx
40058b: 5d pop rbp
40058c: 41 5c pop r12
40058e: 41 5d pop r13
400590: 41 5e pop r14
400592: 41 5f pop r15
400594: c3 ret
400595: 90 nop
400596: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40059d: 00 00 00
00000000004005a0 <__libc_csu_fini>:
4005a0: f3 c3 repz ret
Disassembly of section .fini:
00000000004005a4 <_fini>:
4005a4: 48 83 ec 08 sub rsp,0x8
4005a8: 48 83 c4 08 add rsp,0x8
4005ac: c3
The following is my C file:
int main()
{
return 36;
}
It contains only return statement. But if I use the size command, it shows the
output like this:
mohanraj#ltsp63:~/Development/chap8$ size a.out
text data bss dec hex filename
1056 252 8 1316 524 a.out
mohanraj#ltsp63:~/Development/chap8$
Even though my program does not contain any global variable, or undeclared data. But, the output shows data segment have 252 and the bss have 8 bytes. So, why the output is like this? what is 252 and 8 refers.
Size Command
First see the definition of each column:
text - Actual machine instructions that your CPU going to execute. Linux allows to share this data.
data - All initialized variables (declarations) declared in a program (e.g., float salary=123.45;).
bss - The BSS consists of uninitialized data such as arrays that you have not set any values to or null pointers.
As Blue Moon said. On Linux, the execution starts by calling _start() function. Which does environment setup. Every C program has hidden "libraries" that depends on compilator you using. There are settings for global parameters, exit calls and after complete configuration it finally calls your main() function.
ASFAIK there's no way to see how your code looks encapsulated with configuration and _start() function. But I can show you that even your code contains more information than you thought the closer to hardware we are.
Hint:
Type readelf -a a.out to see how much information your exec really carrying.
What is inside?
Do not compare code in your source file to the size of executable file, it depends on the OS, compilator, and used libraries.
In my example, with exactly the same code, SIZE returns:
eryk#eryk-pc:~$ gcc a.c
eryk#eryk-pc:~$ size a.out
text data bss dec hex filename
1033 276 4 1313 521 a.out
Let's see what is inside...
eryk#eryk-pc:~$ gcc -S a.c
This will run the preprocessor over a.c, perform the initial compilation and then stop before the assembler is run.
eryk#eryk-pc:~$ cat a.s
.file "a.c"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
movl $36, %eax
popl %ebp
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
.section .note.GNU-stack,"",#progbits
Then look on the assembly code
eryk#eryk-pc:~$ objdump -d -M intel -S a.out
a.out: file format elf32-i386
Disassembly of section .init:
08048294 <_init>:
8048294: 53 push ebx
8048295: 83 ec 08 sub esp,0x8
8048298: e8 83 00 00 00 call 8048320 <__x86.get_pc_thunk.bx>
804829d: 81 c3 63 1d 00 00 add ebx,0x1d63
80482a3: 8b 83 fc ff ff ff mov eax,DWORD PTR [ebx-0x4]
80482a9: 85 c0 test eax,eax
80482ab: 74 05 je 80482b2 <_init+0x1e>
80482ad: e8 1e 00 00 00 call 80482d0 <__gmon_start__#plt>
80482b2: 83 c4 08 add esp,0x8
80482b5: 5b pop ebx
80482b6: c3 ret
Disassembly of section .plt:
080482c0 <__gmon_start__#plt-0x10>:
80482c0: ff 35 04 a0 04 08 push DWORD PTR ds:0x804a004
80482c6: ff 25 08 a0 04 08 jmp DWORD PTR ds:0x804a008
80482cc: 00 00 add BYTE PTR [eax],al
...
080482d0 <__gmon_start__#plt>:
80482d0: ff 25 0c a0 04 08 jmp DWORD PTR ds:0x804a00c
80482d6: 68 00 00 00 00 push 0x0
80482db: e9 e0 ff ff ff jmp 80482c0 <_init+0x2c>
080482e0 <__libc_start_main#plt>:
80482e0: ff 25 10 a0 04 08 jmp DWORD PTR ds:0x804a010
80482e6: 68 08 00 00 00 push 0x8
80482eb: e9 d0 ff ff ff jmp 80482c0 <_init+0x2c>
Disassembly of section .text:
080482f0 <_start>:
80482f0: 31 ed xor ebp,ebp
80482f2: 5e pop esi
80482f3: 89 e1 mov ecx,esp
80482f5: 83 e4 f0 and esp,0xfffffff0
80482f8: 50 push eax
80482f9: 54 push esp
80482fa: 52 push edx
80482fb: 68 70 84 04 08 push 0x8048470
8048300: 68 00 84 04 08 push 0x8048400
8048305: 51 push ecx
8048306: 56 push esi
8048307: 68 ed 83 04 08 push 0x80483ed
804830c: e8 cf ff ff ff call 80482e0 <__libc_start_main#plt>
8048311: f4 hlt
8048312: 66 90 xchg ax,ax
8048314: 66 90 xchg ax,ax
8048316: 66 90 xchg ax,ax
8048318: 66 90 xchg ax,ax
804831a: 66 90 xchg ax,ax
804831c: 66 90 xchg ax,ax
804831e: 66 90 xchg ax,ax
08048320 <__x86.get_pc_thunk.bx>:
8048320: 8b 1c 24 mov ebx,DWORD PTR [esp]
8048323: c3 ret
8048324: 66 90 xchg ax,ax
8048326: 66 90 xchg ax,ax
8048328: 66 90 xchg ax,ax
804832a: 66 90 xchg ax,ax
804832c: 66 90 xchg ax,ax
804832e: 66 90 xchg ax,ax
08048330 <deregister_tm_clones>:
8048330: b8 1f a0 04 08 mov eax,0x804a01f
8048335: 2d 1c a0 04 08 sub eax,0x804a01c
804833a: 83 f8 06 cmp eax,0x6
804833d: 77 01 ja 8048340 <deregister_tm_clones+0x10>
804833f: c3 ret
8048340: b8 00 00 00 00 mov eax,0x0
8048345: 85 c0 test eax,eax
8048347: 74 f6 je 804833f <deregister_tm_clones+0xf>
8048349: 55 push ebp
804834a: 89 e5 mov ebp,esp
804834c: 83 ec 18 sub esp,0x18
804834f: c7 04 24 1c a0 04 08 mov DWORD PTR [esp],0x804a01c
8048356: ff d0 call eax
8048358: c9 leave
8048359: c3 ret
804835a: 8d b6 00 00 00 00 lea esi,[esi+0x0]
08048360 <register_tm_clones>:
8048360: b8 1c a0 04 08 mov eax,0x804a01c
8048365: 2d 1c a0 04 08 sub eax,0x804a01c
804836a: c1 f8 02 sar eax,0x2
804836d: 89 c2 mov edx,eax
804836f: c1 ea 1f shr edx,0x1f
8048372: 01 d0 add eax,edx
8048374: d1 f8 sar eax,1
8048376: 75 01 jne 8048379 <register_tm_clones+0x19>
8048378: c3 ret
8048379: ba 00 00 00 00 mov edx,0x0
804837e: 85 d2 test edx,edx
8048380: 74 f6 je 8048378 <register_tm_clones+0x18>
8048382: 55 push ebp
8048383: 89 e5 mov ebp,esp
8048385: 83 ec 18 sub esp,0x18
8048388: 89 44 24 04 mov DWORD PTR [esp+0x4],eax
804838c: c7 04 24 1c a0 04 08 mov DWORD PTR [esp],0x804a01c
8048393: ff d2 call edx
8048395: c9 leave
8048396: c3 ret
8048397: 89 f6 mov esi,esi
8048399: 8d bc 27 00 00 00 00 lea edi,[edi+eiz*1+0x0]
080483a0 <__do_global_dtors_aux>:
80483a0: 80 3d 1c a0 04 08 00 cmp BYTE PTR ds:0x804a01c,0x0
80483a7: 75 13 jne 80483bc <__do_global_dtors_aux+0x1c>
80483a9: 55 push ebp
80483aa: 89 e5 mov ebp,esp
80483ac: 83 ec 08 sub esp,0x8
80483af: e8 7c ff ff ff call 8048330 <deregister_tm_clones>
80483b4: c6 05 1c a0 04 08 01 mov BYTE PTR ds:0x804a01c,0x1
80483bb: c9 leave
80483bc: f3 c3 repz ret
80483be: 66 90 xchg ax,ax
080483c0 <frame_dummy>:
80483c0: a1 10 9f 04 08 mov eax,ds:0x8049f10
80483c5: 85 c0 test eax,eax
80483c7: 74 1f je 80483e8 <frame_dummy+0x28>
80483c9: b8 00 00 00 00 mov eax,0x0
80483ce: 85 c0 test eax,eax
80483d0: 74 16 je 80483e8 <frame_dummy+0x28>
80483d2: 55 push ebp
80483d3: 89 e5 mov ebp,esp
80483d5: 83 ec 18 sub esp,0x18
80483d8: c7 04 24 10 9f 04 08 mov DWORD PTR [esp],0x8049f10
80483df: ff d0 call eax
80483e1: c9 leave
80483e2: e9 79 ff ff ff jmp 8048360 <register_tm_clones>
80483e7: 90 nop
80483e8: e9 73 ff ff ff jmp 8048360 <register_tm_clones>
080483ed <main>:
80483ed: 55 push ebp
80483ee: 89 e5 mov ebp,esp
80483f0: b8 24 00 00 00 mov eax,0x24
80483f5: 5d pop ebp
80483f6: c3 ret
80483f7: 66 90 xchg ax,ax
80483f9: 66 90 xchg ax,ax
80483fb: 66 90 xchg ax,ax
80483fd: 66 90 xchg ax,ax
80483ff: 90 nop
08048400 <__libc_csu_init>:
8048400: 55 push ebp
8048401: 57 push edi
8048402: 31 ff xor edi,edi
8048404: 56 push esi
8048405: 53 push ebx
8048406: e8 15 ff ff ff call 8048320 <__x86.get_pc_thunk.bx>
804840b: 81 c3 f5 1b 00 00 add ebx,0x1bf5
8048411: 83 ec 1c sub esp,0x1c
8048414: 8b 6c 24 30 mov ebp,DWORD PTR [esp+0x30]
8048418: 8d b3 0c ff ff ff lea esi,[ebx-0xf4]
804841e: e8 71 fe ff ff call 8048294 <_init>
8048423: 8d 83 08 ff ff ff lea eax,[ebx-0xf8]
8048429: 29 c6 sub esi,eax
804842b: c1 fe 02 sar esi,0x2
804842e: 85 f6 test esi,esi
8048430: 74 27 je 8048459 <__libc_csu_init+0x59>
8048432: 8d b6 00 00 00 00 lea esi,[esi+0x0]
8048438: 8b 44 24 38 mov eax,DWORD PTR [esp+0x38]
804843c: 89 2c 24 mov DWORD PTR [esp],ebp
804843f: 89 44 24 08 mov DWORD PTR [esp+0x8],eax
8048443: 8b 44 24 34 mov eax,DWORD PTR [esp+0x34]
8048447: 89 44 24 04 mov DWORD PTR [esp+0x4],eax
804844b: ff 94 bb 08 ff ff ff call DWORD PTR [ebx+edi*4-0xf8]
8048452: 83 c7 01 add edi,0x1
8048455: 39 f7 cmp edi,esi
8048457: 75 df jne 8048438 <__libc_csu_init+0x38>
8048459: 83 c4 1c add esp,0x1c
804845c: 5b pop ebx
804845d: 5e pop esi
804845e: 5f pop edi
804845f: 5d pop ebp
8048460: c3 ret
8048461: eb 0d jmp 8048470 <__libc_csu_fini>
8048463: 90 nop
8048464: 90 nop
8048465: 90 nop
8048466: 90 nop
8048467: 90 nop
8048468: 90 nop
8048469: 90 nop
804846a: 90 nop
804846b: 90 nop
804846c: 90 nop
804846d: 90 nop
804846e: 90 nop
804846f: 90 nop
08048470 <__libc_csu_fini>:
8048470: f3 c3 repz ret
Disassembly of section .fini:
08048474 <_fini>:
8048474: 53 push ebx
8048475: 83 ec 08 sub esp,0x8
8048478: e8 a3 fe ff ff call 8048320 <__x86.get_pc_thunk.bx>
804847d: 81 c3 83 1b 00 00 add ebx,0x1b83
8048483: 83 c4 08 add esp,0x8
8048486: 5b pop ebx
8048487: c3 ret
Next step would converting above code to 01 notation.
As you can see. Even simple c program contains complicated operation the closer to hardware your code is. I hope I have explained to you why the executable file is bigger than you thought. If you have any doubts, feel free to comment my post. I will edit my answer immediately.
I wrote a very simple C code as follow:
int data_items[] = {3,67,32,4,89,6,34,2,9,0};
int max(int* pt)
{
int val = *pt;
while(*pt != 0)
{
if (*pt > val)
{
val = *pt;
}
++pt;
}
return val;
}
int main()
{
max(data_items);
return 0;
}
then I compiled it with gcc:
gcc main.c
and then disassembled it with:
objdump -d a.out
At last I get the assemble code:
a.out: file format elf64-x86-64
Disassembly of section .init:
00000000004003a8 <_init>:
4003a8: 48 83 ec 08 sub $0x8,%rsp
4003ac: 48 8b 05 45 0c 20 00 mov 0x200c45(%rip),%rax # 600ff8 <_DYNAMIC+0x1d0>
4003b3: 48 85 c0 test %rax,%rax
4003b6: 74 05 je 4003bd <_init+0x15>
4003b8: e8 33 00 00 00 callq 4003f0 <__gmon_start__#plt>
4003bd: 48 83 c4 08 add $0x8,%rsp
4003c1: c3 retq
Disassembly of section .plt:
00000000004003d0 <__libc_start_main#plt-0x10>:
4003d0: ff 35 32 0c 20 00 pushq 0x200c32(%rip) # 601008 <_GLOBAL_OFFSET_TABLE_+0x8>
4003d6: ff 25 34 0c 20 00 jmpq *0x200c34(%rip) # 601010 <_GLOBAL_OFFSET_TABLE_+0x10>
4003dc: 0f 1f 40 00 nopl 0x0(%rax)
00000000004003e0 <__libc_start_main#plt>:
4003e0: ff 25 32 0c 20 00 jmpq *0x200c32(%rip) # 601018 <_GLOBAL_OFFSET_TABLE_+0x18>
4003e6: 68 00 00 00 00 pushq $0x0
4003eb: e9 e0 ff ff ff jmpq 4003d0 <_init+0x28>
00000000004003f0 <__gmon_start__#plt>:
4003f0: ff 25 2a 0c 20 00 jmpq *0x200c2a(%rip) # 601020 <_GLOBAL_OFFSET_TABLE_+0x20>
4003f6: 68 01 00 00 00 pushq $0x1
4003fb: e9 d0 ff ff ff jmpq 4003d0 <_init+0x28>
Disassembly of section .text:
0000000000400400 <_start>:
400400: 31 ed xor %ebp,%ebp
400402: 49 89 d1 mov %rdx,%r9
400405: 5e pop %rsi
400406: 48 89 e2 mov %rsp,%rdx
400409: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
40040d: 50 push %rax
40040e: 54 push %rsp
40040f: 49 c7 c0 b0 05 40 00 mov $0x4005b0,%r8
400416: 48 c7 c1 40 05 40 00 mov $0x400540,%rcx
40041d: 48 c7 c7 28 05 40 00 mov $0x400528,%rdi
400424: e8 b7 ff ff ff callq 4003e0 <__libc_start_main#plt>
400429: f4 hlt
40042a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0000000000400430 <deregister_tm_clones>:
400430: b8 8f 10 60 00 mov $0x60108f,%eax
400435: 55 push %rbp
400436: 48 2d 88 10 60 00 sub $0x601088,%rax
40043c: 48 83 f8 0e cmp $0xe,%rax
400440: 48 89 e5 mov %rsp,%rbp
400443: 77 02 ja 400447 <deregister_tm_clones+0x17>
400445: 5d pop %rbp
400446: c3 retq
400447: b8 00 00 00 00 mov $0x0,%eax
40044c: 48 85 c0 test %rax,%rax
40044f: 74 f4 je 400445 <deregister_tm_clones+0x15>
400451: 5d pop %rbp
400452: bf 88 10 60 00 mov $0x601088,%edi
400457: ff e0 jmpq *%rax
400459: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
0000000000400460 <register_tm_clones>:
400460: b8 88 10 60 00 mov $0x601088,%eax
400465: 55 push %rbp
400466: 48 2d 88 10 60 00 sub $0x601088,%rax
40046c: 48 c1 f8 03 sar $0x3,%rax
400470: 48 89 e5 mov %rsp,%rbp
400473: 48 89 c2 mov %rax,%rdx
400476: 48 c1 ea 3f shr $0x3f,%rdx
40047a: 48 01 d0 add %rdx,%rax
40047d: 48 d1 f8 sar %rax
400480: 75 02 jne 400484 <register_tm_clones+0x24>
400482: 5d pop %rbp
400483: c3 retq
400484: ba 00 00 00 00 mov $0x0,%edx
400489: 48 85 d2 test %rdx,%rdx
40048c: 74 f4 je 400482 <register_tm_clones+0x22>
40048e: 5d pop %rbp
40048f: 48 89 c6 mov %rax,%rsi
400492: bf 88 10 60 00 mov $0x601088,%edi
400497: ff e2 jmpq *%rdx
400499: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
00000000004004a0 <__do_global_dtors_aux>:
4004a0: 80 3d e1 0b 20 00 00 cmpb $0x0,0x200be1(%rip) # 601088 <__TMC_END__>
4004a7: 75 11 jne 4004ba <__do_global_dtors_aux+0x1a>
4004a9: 55 push %rbp
4004aa: 48 89 e5 mov %rsp,%rbp
4004ad: e8 7e ff ff ff callq 400430 <deregister_tm_clones>
4004b2: 5d pop %rbp
4004b3: c6 05 ce 0b 20 00 01 movb $0x1,0x200bce(%rip) # 601088 <__TMC_END__>
4004ba: f3 c3 repz retq
4004bc: 0f 1f 40 00 nopl 0x0(%rax)
00000000004004c0 <frame_dummy>:
4004c0: 48 83 3d 58 09 20 00 cmpq $0x0,0x200958(%rip) # 600e20 <__JCR_END__>
4004c7: 00
4004c8: 74 1e je 4004e8 <frame_dummy+0x28>
4004ca: b8 00 00 00 00 mov $0x0,%eax
4004cf: 48 85 c0 test %rax,%rax
4004d2: 74 14 je 4004e8 <frame_dummy+0x28>
4004d4: 55 push %rbp
4004d5: bf 20 0e 60 00 mov $0x600e20,%edi
4004da: 48 89 e5 mov %rsp,%rbp
4004dd: ff d0 callq *%rax
4004df: 5d pop %rbp
4004e0: e9 7b ff ff ff jmpq 400460 <register_tm_clones>
4004e5: 0f 1f 00 nopl (%rax)
4004e8: e9 73 ff ff ff jmpq 400460 <register_tm_clones>
00000000004004ed <max>:
4004ed: 55 push %rbp
4004ee: 48 89 e5 mov %rsp,%rbp
4004f1: 48 89 7d e8 mov %rdi,-0x18(%rbp)
4004f5: 48 8b 45 e8 mov -0x18(%rbp),%rax
4004f9: 8b 00 mov (%rax),%eax
4004fb: 89 45 fc mov %eax,-0x4(%rbp)
4004fe: eb 19 jmp 400519 <max+0x2c>
400500: 48 8b 45 e8 mov -0x18(%rbp),%rax
400504: 8b 00 mov (%rax),%eax
400506: 3b 45 fc cmp -0x4(%rbp),%eax
400509: 7e 09 jle 400514 <max+0x27>
40050b: 48 8b 45 e8 mov -0x18(%rbp),%rax
40050f: 8b 00 mov (%rax),%eax
400511: 89 45 fc mov %eax,-0x4(%rbp)
400514: 48 83 45 e8 04 addq $0x4,-0x18(%rbp)
400519: 48 8b 45 e8 mov -0x18(%rbp),%rax
40051d: 8b 00 mov (%rax),%eax
40051f: 85 c0 test %eax,%eax
400521: 75 dd jne 400500 <max+0x13>
400523: 8b 45 fc mov -0x4(%rbp),%eax
400526: 5d pop %rbp
400527: c3 retq
0000000000400528 <main>:
400528: 55 push %rbp
400529: 48 89 e5 mov %rsp,%rbp
40052c: bf 60 10 60 00 mov $0x601060,%edi
400531: e8 b7 ff ff ff callq 4004ed <max>
400536: b8 00 00 00 00 mov $0x0,%eax
40053b: 5d pop %rbp
40053c: c3 retq
40053d: 0f 1f 00 nopl (%rax)
0000000000400540 <__libc_csu_init>:
400540: 41 57 push %r15
400542: 41 89 ff mov %edi,%r15d
400545: 41 56 push %r14
400547: 49 89 f6 mov %rsi,%r14
40054a: 41 55 push %r13
40054c: 49 89 d5 mov %rdx,%r13
40054f: 41 54 push %r12
400551: 4c 8d 25 b8 08 20 00 lea 0x2008b8(%rip),%r12 # 600e10 <__frame_dummy_init_array_entry>
400558: 55 push %rbp
400559: 48 8d 2d b8 08 20 00 lea 0x2008b8(%rip),%rbp # 600e18 <__init_array_end>
400560: 53 push %rbx
400561: 4c 29 e5 sub %r12,%rbp
400564: 31 db xor %ebx,%ebx
400566: 48 c1 fd 03 sar $0x3,%rbp
40056a: 48 83 ec 08 sub $0x8,%rsp
40056e: e8 35 fe ff ff callq 4003a8 <_init>
400573: 48 85 ed test %rbp,%rbp
400576: 74 1e je 400596 <__libc_csu_init+0x56>
400578: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
40057f: 00
400580: 4c 89 ea mov %r13,%rdx
400583: 4c 89 f6 mov %r14,%rsi
400586: 44 89 ff mov %r15d,%edi
400589: 41 ff 14 dc callq *(%r12,%rbx,8)
40058d: 48 83 c3 01 add $0x1,%rbx
400591: 48 39 eb cmp %rbp,%rbx
400594: 75 ea jne 400580 <__libc_csu_init+0x40>
400596: 48 83 c4 08 add $0x8,%rsp
40059a: 5b pop %rbx
40059b: 5d pop %rbp
40059c: 41 5c pop %r12
40059e: 41 5d pop %r13
4005a0: 41 5e pop %r14
4005a2: 41 5f pop %r15
4005a4: c3 retq
4005a5: 66 66 2e 0f 1f 84 00 data32 nopw %cs:0x0(%rax,%rax,1)
4005ac: 00 00 00 00
00000000004005b0 <__libc_csu_fini>:
4005b0: f3 c3 repz retq
Disassembly of section .fini:
00000000004005b4 <_fini>:
4005b4: 48 83 ec 08 sub $0x8,%rsp
4005b8: 48 83 c4 08 add $0x8,%rsp
4005bc: c3 retq
My question is that, in main function:
40052c: bf 60 10 60 00 mov $0x601060,%edi
the address $0x601060 should be the address of the array data_items in the C code. But I can not find the data_items array value in the assemble code, where is it?
By the way, in the max function code:
4004f1: 48 89 7d e8 mov %rdi,-0x18(%rbp)
I do think the stack should be:
mov %rdi, -0x8(%rbp)
why the compiler makes some hole in the stack?
my system is Ubuntu 14.04 LTS.
objdump -d only disassembles sections of the executable where there's supposed to be code.
Use objdump -D to disassemble all sections, and you'll find this:
Disassembly of section .data:
08049760 <__data_start>:
...
08049780 <data_items>:
8049780: 03 00 add (%eax),%eax
8049782: 00 00 add %al,(%eax)
8049784: 43 inc %ebx
8049785: 00 00 add %al,(%eax)
8049787: 00 20 add %ah,(%eax)
8049789: 00 00 add %al,(%eax)
804978b: 00 04 00 add %al,(%eax,%eax,1)
804978e: 00 00 add %al,(%eax)
That's your array. objdump will try to disassemble it as if it was machine code, so the assembly isn't going to make sense though).
If you run nm a.out, you will see that 0x601060 is the address of data_items.
When it comes to runtime dissasembly gdb itself can be very handy:
(gdb) break main
Breakpoint 1 at 0x4004f3
(gdb) break max
Breakpoint 2 at 0x4004b8
(gdb) run
Starting program: /home/dtarcatu/workspace/ctest/a.out
Breakpoint 1, 0x00000000004004f3 in main ()
(gdb) disas
Dump of assembler code for function main:
0x00000000004004ef <+0>: push %rbp
0x00000000004004f0 <+1>: mov %rsp,%rbp
=> 0x00000000004004f3 <+4>: mov $0x601040,%edi
0x00000000004004f8 <+9>: callq 0x4004b4 <max>
0x00000000004004fd <+14>: mov $0x0,%eax
0x0000000000400502 <+19>: pop %rbp
0x0000000000400503 <+20>: retq
End of assembler dump.
(gdb) x 0x601040
0x601040 <data_items>: 0x00000003
(gdb) x /10d 0x601040
0x601040 <data_items>: 3 67 32 4
0x601050 <data_items+16>: 89 6 34 2
0x601060 <data_items+32>: 9 0
Now regarding your stack discipline dillema I'm not sure I can come up with a very good answer, but I think it's related to the x86-64 red zone optimization.
(gdb) c
Continuing.
Breakpoint 2, 0x00000000004004b8 in max ()
(gdb) disas
Dump of assembler code for function max:
0x00000000004004b4 <+0>: push %rbp
0x00000000004004b5 <+1>: mov %rsp,%rbp
=> 0x00000000004004b8 <+4>: mov %rdi,-0x18(%rbp)
0x00000000004004bc <+8>: mov -0x18(%rbp),%rax
0x00000000004004c0 <+12>: mov (%rax),%eax
0x00000000004004c2 <+14>: mov %eax,-0x4(%rbp)
0x00000000004004c5 <+17>: jmp 0x4004e0 <max+44>
0x00000000004004c7 <+19>: mov -0x18(%rbp),%rax
0x00000000004004cb <+23>: mov (%rax),%eax
0x00000000004004cd <+25>: cmp -0x4(%rbp),%eax
0x00000000004004d0 <+28>: jle 0x4004db <max+39>
0x00000000004004d2 <+30>: mov -0x18(%rbp),%rax
0x00000000004004d6 <+34>: mov (%rax),%eax
0x00000000004004d8 <+36>: mov %eax,-0x4(%rbp)
0x00000000004004db <+39>: addq $0x4,-0x18(%rbp)
0x00000000004004e0 <+44>: mov -0x18(%rbp),%rax
0x00000000004004e4 <+48>: mov (%rax),%eax
0x00000000004004e6 <+50>: test %eax,%eax
0x00000000004004e8 <+52>: jne 0x4004c7 <max+19>
0x00000000004004ea <+54>: mov -0x4(%rbp),%eax
0x00000000004004ed <+57>: pop %rbp
0x00000000004004ee <+58>: retq
End of assembler dump.
(gdb) ni
0x00000000004004bc in max ()
(gdb) print $rbp
$1 = (void *) 0x7fffffffdf10
(gdb) print $rsp
$2 = (void *) 0x7fffffffdf10
The -0x18(%rbp) location is definitely inside the red zone. I'm not sure how the compiler uses that but it can probably do whatever the hack it wants in there. Your local int seems to be stored at -0x4(%rbp) later on, so I guess -0x18(%rbp) is just some sort of temporary buffer.
You could also ask the compiler to output its emitted assembler code. Notice that there is no observable side-effect in your program, so the compiler could optimize it to a nop -or compute the max at compile-time, etc.
However,compiling your code with (GCC 4.9.1 Debian/x86-64)
gcc -fverbose-asm -O -S main.c
I'm getting a file main.s containing (with several lines before and after) :
.globl max
.type max, #function
max:
.LFB0:
.file 1 "main.c"
.loc 1 4 0
.cfi_startproc
.LVL0:
.loc 1 5 0
movl (%rdi), %eax # *pt_5(D), val
.LVL1:
.loc 1 6 0
testl %eax, %eax # val
je .L2 #,
movl %eax, %edx # val, val
.L3:
cmpl %edx, %eax # val, val
cmovl %edx, %eax # val,, val, val
.LVL2:
.loc 1 12 0
addq $4, %rdi #, pt
.LVL3:
.loc 1 6 0
movl (%rdi), %edx # MEM[base: pt_8, offset: 0B], val
testl %edx, %edx # val
jne .L3 #,
.L2:
.loc 1 15 0
rep ret
.cfi_endproc
.LFE0:
.size max, .-max
.globl main
.type main, #function
main:
.LFB1:
.loc 1 18 0
.cfi_startproc
.loc 1 20 0
movl $data_items, %edi #,
call max #
.LVL4:
.loc 1 23 0
movl $0, %eax #,
ret
.cfi_endproc
.LFE1:
.size main, .-main
.globl data_items
.data
.align 32
.type data_items, #object
.size data_items, 40
data_items:
.long 3
.long 67
.long 32
.long 4
.long 89
.long 6
.long 34
.long 2
.long 9
.long 0
.text
So you see that data_items goes into the data section (because of the .data directive)
BTW, GCC usually emits an assembler file transformed by the as assembler into an ELF file main.o containing the object code. That object file contains relocation directives and several sections which are later processed by the ld linker.