gcc turning on -O3 causes bug on 128 bit integer [duplicate] - c

This question already has answers here:
What is the strict aliasing rule?
(11 answers)
Why does unaligned access to mmap'ed memory sometimes segfault on AMD64?
(1 answer)
Closed 2 years ago.
Two versions of a function that multiplies a unsigned 256 bit integer by constant 977. (for cryptography application) Note __int128_t type is used in the 2nd function.
#include <stdint.h>
#include <stdio.h>
// z,z_carry = uint256(y) * 977
static inline void multiply_977(uint32_t *y, uint32_t *z) {
const uint32_t x = 977;
uint32_t high=0;
uint64_t prod = x*(uint64_t)(y[0]); z[0] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[1])); z[1] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[2])); z[2] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[3])); z[3] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[4])); z[4] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[5])); z[5] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[6])); z[6] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[7])); z[7] = prod;
high=prod>>32; z[8] = (uint32_t)high; z[9] = 0;
}
static inline void multiply_977_2(uint32_t *y_, uint32_t *z_) {
const uint32_t x = 977;
uint64_t *y = (uint64_t*)y_;
uint64_t *z = (uint64_t*)z_;
uint64_t high=0;
__uint128_t prod = x * (__uint128_t)(y[0]);
z[0] = prod; high=prod>>64; prod = high + (x * (__uint128_t)(y[1]));
z[1] = prod; high=prod>>64; prod = high + (x * (__uint128_t)(y[2]));
z[2] = prod; high=prod>>64; prod = high + (x * (__uint128_t)(y[3]));
z[3] = prod; high=prod>>64;
z[4] = high&0x00000000ffffffff;
// z_[8] = (uint64_t)high; z_[9] = 0;
}
int main(int argc, char** argv) {
uint32_t a[10] = {0};
uint32_t b[10] = {0};
a[0] = 1;
b[0] = 1;
for (int i=0; i<100; ++i) {
multiply_977(a, a);
multiply_977_2(b, b);
}
for (int i=0; i<8; ++i) { printf("%08x ", a[i]); }
printf("\n");
for (int i=0; i<8; ++i) { printf("%08x ", b[i]); }
printf("\n");
return 0;
}
output with and without -O3, results are expected to be identical:
khaotik#KKST2:~/tmp$ gcc a.c; ./a.out
2372c341 af466dcc 57f3a318 7ce73fd9 cd8f973d 81dc6c7f 84637b1d f0de09cd
2372c341 af466dcc 57f3a318 7ce73fd9 cd8f973d 81dc6c7f 84637b1d f0de09cd
khaotik#KKST2:~/tmp$ gcc -O3 a.c; ./a.out
2372c341 af466dcc 57f3a318 7ce73fd9 cd8f973d 81dc6c7f 84637b1d f0de09cd
00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000000
With -O3, gcc generates SSE code:
00000000000010a0 <main>:
10a0: f3 0f 1e fa endbr64
10a4: 41 54 push %r12
10a6: 66 0f ef c0 pxor %xmm0,%xmm0
10aa: b9 64 00 00 00 mov $0x64,%ecx
10af: 45 31 c9 xor %r9d,%r9d
10b2: 55 push %rbp
10b3: 31 f6 xor %esi,%esi
10b5: 31 ed xor %ebp,%ebp
10b7: 45 31 db xor %r11d,%r11d
10ba: 53 push %rbx
10bb: 45 31 c0 xor %r8d,%r8d
10be: 31 db xor %ebx,%ebx
10c0: 45 31 d2 xor %r10d,%r10d
10c3: bf 01 00 00 00 mov $0x1,%edi
10c8: 48 83 ec 60 sub $0x60,%rsp
10cc: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
10d3: 00 00
10d5: 48 89 44 24 58 mov %rax,0x58(%rsp)
10da: 31 c0 xor %eax,%eax
10dc: 0f 29 44 24 30 movaps %xmm0,0x30(%rsp)
10e1: c7 44 24 24 00 00 00 movl $0x0,0x24(%rsp)
10e8: 00
10e9: 48 c7 44 24 50 00 00 movq $0x0,0x50(%rsp)
10f0: 00 00
10f2: c7 44 24 30 01 00 00 movl $0x1,0x30(%rsp)
10f9: 00
10fa: 0f 29 44 24 40 movaps %xmm0,0x40(%rsp)
10ff: 90 nop
1100: 89 f8 mov %edi,%eax
1102: 4d 69 c9 d1 03 00 00 imul $0x3d1,%r9,%r9
1109: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1110: 89 c7 mov %eax,%edi
1112: 48 c1 e8 20 shr $0x20,%rax
1116: 48 89 c2 mov %rax,%rdx
1119: 44 89 d0 mov %r10d,%eax
111c: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1123: 48 01 d0 add %rdx,%rax
1126: 41 89 c2 mov %eax,%r10d
1129: 48 c1 e8 20 shr $0x20,%rax
112d: 48 89 c2 mov %rax,%rdx
1130: 44 89 c0 mov %r8d,%eax
1133: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
113a: 48 01 d0 add %rdx,%rax
113d: 41 89 c0 mov %eax,%r8d
1140: 48 c1 e8 20 shr $0x20,%rax
1144: 48 89 c2 mov %rax,%rdx
1147: 44 89 d8 mov %r11d,%eax
114a: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1151: 48 01 d0 add %rdx,%rax
1154: 41 89 c3 mov %eax,%r11d
1157: 48 c1 e8 20 shr $0x20,%rax
115b: 48 89 c2 mov %rax,%rdx
115e: 89 f0 mov %esi,%eax
1160: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1167: 48 01 d0 add %rdx,%rax
116a: 89 c6 mov %eax,%esi
116c: 48 c1 e8 20 shr $0x20,%rax
1170: 48 89 c2 mov %rax,%rdx
1173: 89 e8 mov %ebp,%eax
1175: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
117c: 48 01 d0 add %rdx,%rax
117f: 89 c5 mov %eax,%ebp
1181: 48 c1 e8 20 shr $0x20,%rax
1185: 48 89 c2 mov %rax,%rdx
1188: 89 d8 mov %ebx,%eax
118a: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1191: 48 01 d0 add %rdx,%rax
1194: 89 c3 mov %eax,%ebx
1196: 48 c1 e8 20 shr $0x20,%rax
119a: 4c 01 c8 add %r9,%rax
119d: 41 89 c1 mov %eax,%r9d
11a0: 83 e9 01 sub $0x1,%ecx
11a3: 0f 85 57 ff ff ff jne 1100 <main+0x60>
11a9: 66 41 0f 6e c8 movd %r8d,%xmm1
11ae: 66 41 0f 6e d3 movd %r11d,%xmm2
11b3: 66 0f 6e c7 movd %edi,%xmm0
11b7: 48 c1 e8 20 shr $0x20,%rax
11bb: 66 41 0f 6e da movd %r10d,%xmm3
11c0: 66 0f 62 ca punpckldq %xmm2,%xmm1
11c4: 66 0f 6e ed movd %ebp,%xmm5
11c8: 89 44 24 20 mov %eax,0x20(%rsp)
11cc: 66 0f 62 c3 punpckldq %xmm3,%xmm0
11d0: 66 41 0f 6e e1 movd %r9d,%xmm4
11d5: 4c 8d 64 24 20 lea 0x20(%rsp),%r12
11da: 66 0f 6c c1 punpcklqdq %xmm1,%xmm0
11de: 66 0f 6e cb movd %ebx,%xmm1
11e2: 48 8d 2d 1b 0e 00 00 lea 0xe1b(%rip),%rbp # 2004 <_IO_stdin_used+0x4>
11e9: 48 89 e3 mov %rsp,%rbx
11ec: 0f 29 04 24 movaps %xmm0,(%rsp)
11f0: 66 0f 6e c6 movd %esi,%xmm0
11f4: 66 0f 62 cc punpckldq %xmm4,%xmm1
11f8: 66 0f 62 c5 punpckldq %xmm5,%xmm0
11fc: 66 0f 6c c1 punpcklqdq %xmm1,%xmm0
1200: 0f 29 44 24 10 movaps %xmm0,0x10(%rsp)
1205: 0f 1f 00 nopl (%rax)
1208: 8b 13 mov (%rbx),%edx
120a: 48 89 ee mov %rbp,%rsi
120d: bf 01 00 00 00 mov $0x1,%edi
1212: 31 c0 xor %eax,%eax
1214: 48 83 c3 04 add $0x4,%rbx
1218: e8 73 fe ff ff callq 1090 <__printf_chk#plt>
121d: 49 39 dc cmp %rbx,%r12
1220: 75 e6 jne 1208 <main+0x168>
1222: bf 0a 00 00 00 mov $0xa,%edi
1227: 48 8d 5c 24 30 lea 0x30(%rsp),%rbx
122c: 4c 8d 64 24 50 lea 0x50(%rsp),%r12
1231: e8 3a fe ff ff callq 1070 <putchar#plt>
1236: 48 8d 2d c7 0d 00 00 lea 0xdc7(%rip),%rbp # 2004 <_IO_stdin_used+0x4>
123d: 0f 1f 00 nopl (%rax)
1240: 8b 13 mov (%rbx),%edx
1242: 48 89 ee mov %rbp,%rsi
1245: bf 01 00 00 00 mov $0x1,%edi
124a: 31 c0 xor %eax,%eax
124c: 48 83 c3 04 add $0x4,%rbx
1250: e8 3b fe ff ff callq 1090 <__printf_chk#plt>
1255: 49 39 dc cmp %rbx,%r12
1258: 75 e6 jne 1240 <main+0x1a0>
125a: bf 0a 00 00 00 mov $0xa,%edi
125f: e8 0c fe ff ff callq 1070 <putchar#plt>
1264: 48 8b 44 24 58 mov 0x58(%rsp),%rax
1269: 64 48 33 04 25 28 00 xor %fs:0x28,%rax
1270: 00 00
1272: 75 0b jne 127f <main+0x1df>
1274: 48 83 c4 60 add $0x60,%rsp
1278: 31 c0 xor %eax,%eax
127a: 5b pop %rbx
127b: 5d pop %rbp
127c: 41 5c pop %r12
127e: c3 retq
127f: e8 fc fd ff ff callq 1080 <__stack_chk_fail#plt>
1284: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
128b: 00 00 00
128e: 66 90 xchg %ax,%ax
However I'm not asm-savvy enough to figure out the problem.
compiler version and cpu flags:
khaotik#KKST2:~/tmp$ gcc --version
gcc (Ubuntu 9.3.0-10ubuntu2) 9.3.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
khaotik#KKST2:~/tmp$ lscpu | grep Flags
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault invpcid_single pti ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx rdseed adx smap clflushopt intel_pt xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln pts hwp hwp_notify hwp_act_window hwp_epp md_clear flush_l1d

Related

Parse number of bytes reserved for local variables on the stack from GNU objdump output?

Consider the code snippet below.
The entry point of the program is main as defined in C-source code. Now, normally a function starts by decreasing %rsp to reserve space for local variables. But here, the GCC compiler reserves this space in some of the added (initial) functions.
My question is, where do I look for the number of bytes of reserved variables in these GCC-specific initialization functions? In this case, the number of reserved bytes is 0x08.
Also, in what order are these initial functions called?
00000000004003c0 <_start>:
4003c0: 31 ed xor ebp,ebp
4003c2: 49 89 d1 mov r9,rdx
4003c5: 5e pop rsi
4003c6: 48 89 e2 mov rdx,rsp
4003c9: 48 83 e4 f0 and rsp,0xfffffffffffffff0
4003cd: 50 push rax
4003ce: 54 push rsp
4003cf: 49 c7 c0 a0 05 40 00 mov r8,0x4005a0
4003d6: 48 c7 c1 30 05 40 00 mov rcx,0x400530
4003dd: 48 c7 c7 c0 04 40 00 mov rdi,0x4004c0
4003e4: e8 b7 ff ff ff call 4003a0 <__libc_start_main#plt>
4003e9: f4 hlt
4003ea: 66 0f 1f 44 00 00 nop WORD PTR [rax+rax*1+0x0]
00000000004003f0 <deregister_tm_clones>:
4003f0: b8 37 10 60 00 mov eax,0x601037
4003f5: 55 push rbp
4003f6: 48 2d 30 10 60 00 sub rax,0x601030
4003fc: 48 83 f8 0e cmp rax,0xe
400400: 48 89 e5 mov rbp,rsp
400403: 76 1b jbe 400420 <deregister_tm_clones+0x30>
400405: b8 00 00 00 00 mov eax,0x0
40040a: 48 85 c0 test rax,rax
40040d: 74 11 je 400420 <deregister_tm_clones+0x30>
40040f: 5d pop rbp
400410: bf 30 10 60 00 mov edi,0x601030
400415: ff e0 jmp rax
400417: 66 0f 1f 84 00 00 00 nop WORD PTR [rax+rax*1+0x0]
40041e: 00 00
400420: 5d pop rbp
400421: c3 ret
400422: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
400426: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40042d: 00 00 00
0000000000400430 <register_tm_clones>:
400430: be 30 10 60 00 mov esi,0x601030
400435: 55 push rbp
400436: 48 81 ee 30 10 60 00 sub rsi,0x601030
40043d: 48 c1 fe 03 sar rsi,0x3
400441: 48 89 e5 mov rbp,rsp
400444: 48 89 f0 mov rax,rsi
400447: 48 c1 e8 3f shr rax,0x3f
40044b: 48 01 c6 add rsi,rax
40044e: 48 d1 fe sar rsi,1
400451: 74 15 je 400468 <register_tm_clones+0x38>
400453: b8 00 00 00 00 mov eax,0x0
400458: 48 85 c0 test rax,rax
40045b: 74 0b je 400468 <register_tm_clones+0x38>
40045d: 5d pop rbp
40045e: bf 30 10 60 00 mov edi,0x601030
400463: ff e0 jmp rax
400465: 0f 1f 00 nop DWORD PTR [rax]
400468: 5d pop rbp
400469: c3 ret
40046a: 66 0f 1f 44 00 00 nop WORD PTR [rax+rax*1+0x0]
0000000000400470 <__do_global_dtors_aux>:
400470: 80 3d b9 0b 20 00 00 cmp BYTE PTR [rip+0x200bb9],0x0 # 601030 <__TMC_END__>
400477: 75 11 jne 40048a <__do_global_dtors_aux+0x1a>
400479: 55 push rbp
40047a: 48 89 e5 mov rbp,rsp
40047d: e8 6e ff ff ff call 4003f0 <deregister_tm_clones>
400482: 5d pop rbp
400483: c6 05 a6 0b 20 00 01 mov BYTE PTR [rip+0x200ba6],0x1 # 601030 <__TMC_END__>
40048a: f3 c3 repz ret
40048c: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
0000000000400490 <frame_dummy>:
400490: bf 20 0e 60 00 mov edi,0x600e20
400495: 48 83 3f 00 cmp QWORD PTR [rdi],0x0
400499: 75 05 jne 4004a0 <frame_dummy+0x10>
40049b: eb 93 jmp 400430 <register_tm_clones>
40049d: 0f 1f 00 nop DWORD PTR [rax]
4004a0: b8 00 00 00 00 mov eax,0x0
4004a5: 48 85 c0 test rax,rax
4004a8: 74 f1 je 40049b <frame_dummy+0xb>
4004aa: 55 push rbp
4004ab: 48 89 e5 mov rbp,rsp
4004ae: ff d0 call rax
4004b0: 5d pop rbp
4004b1: e9 7a ff ff ff jmp 400430 <register_tm_clones>
4004b6: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
4004bd: 00 00 00
00000000004004c0 <main>:
4004c0: 55 push rbp
4004c1: 48 89 e5 mov rbp,rsp
4004c4: c7 45 f8 00 00 00 00 mov DWORD PTR [rbp-0x8],0x0
4004cb: c7 45 fc 01 00 00 00 mov DWORD PTR [rbp-0x4],0x1
4004d2: eb 46 jmp 40051a <.cend>
4004d4: 66 66 66 2e 0f 1f 84 data16 data16 nop WORD PTR cs:[rax+rax*1+0x0]
4004db: 00 00 00 00 00
4004e0: ff 05 4e 0b 20 00 inc DWORD PTR [rip+0x200b4e] # 601034 <sum>
4004e6: 50 push rax
4004e7: 53 push rbx
4004e8: 56 push rsi
4004e9: 48 31 c0 xor rax,rax
4004ec: 48 c7 c6 14 05 40 00 mov rsi,0x400514
00000000004004f3 <.cloop>:
4004f3: 48 0f b6 1e movzx rbx,BYTE PTR [rsi]
4004f7: 48 31 d8 xor rax,rbx
4004fa: 48 ff c6 inc rsi
4004fd: 48 81 fe 1a 05 40 00 cmp rsi,0x40051a
400504: 75 ed jne 4004f3 <.cloop>
400506: 48 83 f8 00 cmp rax,0x0
40050a: 74 05 je 400511 <.restore>
40050c: 48 31 c0 xor rax,rax
40050f: ff d0 call rax
0000000000400511 <.restore>:
400511: 5e pop rsi
400512: 5b pop rbx
400513: 58 pop rax
0000000000400514 <.cstart>:
400514: eb 01 jmp 400517 <.end>
0000000000400516 <.cslot>:
400516: ac lods al,BYTE PTR ds:[rsi]
0000000000400517 <.end>:
400517: ff 45 fc inc DWORD PTR [rbp-0x4]
000000000040051a <.cend>:
40051a: 83 7d fc 1e cmp DWORD PTR [rbp-0x4],0x1e
40051e: 7e c0 jle 4004e0 <main+0x20>
400520: 8b 05 0e 0b 20 00 mov eax,DWORD PTR [rip+0x200b0e] # 601034 <sum>
400526: 5d pop rbp
400527: c3 ret
400528: 0f 1f 84 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
40052f: 00
0000000000400530 <__libc_csu_init>:
400530: 41 57 push r15
400532: 41 56 push r14
400534: 41 89 ff mov r15d,edi
400537: 41 55 push r13
400539: 41 54 push r12
40053b: 4c 8d 25 ce 08 20 00 lea r12,[rip+0x2008ce] # 600e10 <__frame_dummy_init_array_entry>
400542: 55 push rbp
400543: 48 8d 2d ce 08 20 00 lea rbp,[rip+0x2008ce] # 600e18 <__init_array_end>
40054a: 53 push rbx
40054b: 49 89 f6 mov r14,rsi
40054e: 49 89 d5 mov r13,rdx
400551: 4c 29 e5 sub rbp,r12
400554: 48 83 ec 08 sub rsp,0x8
400558: 48 c1 fd 03 sar rbp,0x3
40055c: e8 0f fe ff ff call 400370 <_init>
400561: 48 85 ed test rbp,rbp
400564: 74 20 je 400586 <__libc_csu_init+0x56>
400566: 31 db xor ebx,ebx
400568: 0f 1f 84 00 00 00 00 nop DWORD PTR [rax+rax*1+0x0]
40056f: 00
400570: 4c 89 ea mov rdx,r13
400573: 4c 89 f6 mov rsi,r14
400576: 44 89 ff mov edi,r15d
400579: 41 ff 14 dc call QWORD PTR [r12+rbx*8]
40057d: 48 83 c3 01 add rbx,0x1
400581: 48 39 eb cmp rbx,rbp
400584: 75 ea jne 400570 <__libc_csu_init+0x40>
400586: 48 83 c4 08 add rsp,0x8
40058a: 5b pop rbx
40058b: 5d pop rbp
40058c: 41 5c pop r12
40058e: 41 5d pop r13
400590: 41 5e pop r14
400592: 41 5f pop r15
400594: c3 ret
400595: 90 nop
400596: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40059d: 00 00 00
00000000004005a0 <__libc_csu_fini>:
4005a0: f3 c3 repz ret
Disassembly of section .fini:
00000000004005a4 <_fini>:
4005a4: 48 83 ec 08 sub rsp,0x8
4005a8: 48 83 c4 08 add rsp,0x8
4005ac: c3

Deciphering x86 assembly function

I am currently working on phase 2 of the binary bomb assignment. I'm having trouble deciphering exactly what a certain function does when called. I've been stuck on it for days.
The function is:
0000000000400f2a <func2a>:
400f2a: 85 ff test %edi,%edi
400f2c: 74 1d je 400f4b <func2a+0x21>
400f2e: b9 cd cc cc cc mov $0xcccccccd,%ecx
400f33: 89 f8 mov %edi,%eax
400f35: f7 e1 mul %ecx
400f37: c1 ea 03 shr $0x3,%edx
400f3a: 8d 04 92 lea (%rdx,%rdx,4),%eax
400f3d: 01 c0 add %eax,%eax
400f3f: 29 c7 sub %eax,%edi
400f41: 83 04 be 01 addl $0x1,(%rsi,%rdi,4)
400f45: 89 d7 mov %edx,%edi
400f47: 85 d2 test %edx,%edx
400f49: 75 e8 jne 400f33 <func2a+0x9>
400f4b: f3 c3 repz retq
It gets called in the larger function "phase_2":
0000000000400f4d <phase_2>:
400f4d: 53 push %rbx
400f4e: 48 83 ec 60 sub $0x60,%rsp
400f52: 48 c7 44 24 30 00 00 movq $0x0,0x30(%rsp)
400f59: 00 00
400f5b: 48 c7 44 24 38 00 00 movq $0x0,0x38(%rsp)
400f62: 00 00
400f64: 48 c7 44 24 40 00 00 movq $0x0,0x40(%rsp)
400f6b: 00 00
400f6d: 48 c7 44 24 48 00 00 movq $0x0,0x48(%rsp)
400f74: 00 00
400f76: 48 c7 44 24 50 00 00 movq $0x0,0x50(%rsp)
400f7d: 00 00
400f7f: 48 c7 04 24 00 00 00 movq $0x0,(%rsp)
400f86: 00
400f87: 48 c7 44 24 08 00 00 movq $0x0,0x8(%rsp)
400f8e: 00 00
400f90: 48 c7 44 24 10 00 00 movq $0x0,0x10(%rsp)
400f97: 00 00
400f99: 48 c7 44 24 18 00 00 movq $0x0,0x18(%rsp)
400fa0: 00 00
400fa2: 48 c7 44 24 20 00 00 movq $0x0,0x20(%rsp)
400fa9: 00 00
400fab: 48 8d 4c 24 58 lea 0x58(%rsp),%rcx
400fb0: 48 8d 54 24 5c lea 0x5c(%rsp),%rdx
400fb5: be 9e 26 40 00 mov $0x40269e,%esi
400fba: b8 00 00 00 00 mov $0x0,%eax
400fbf: e8 6c fc ff ff callq 400c30 <__isoc99_sscanf#plt>
400fc4: 83 f8 02 cmp $0x2,%eax
400fc7: 74 05 je 400fce <phase_2+0x81>
400fc9: e8 c1 06 00 00 callq 40168f <explode_bomb>
400fce: 83 7c 24 5c 64 cmpl $0x64,0x5c(%rsp)
400fd3: 76 07 jbe 400fdc <phase_2+0x8f>
400fd5: 83 7c 24 58 64 cmpl $0x64,0x58(%rsp)
400fda: 77 05 ja 400fe1 <phase_2+0x94>
400fdc: e8 ae 06 00 00 callq 40168f <explode_bomb>
400fe1: 48 8d 74 24 30 lea 0x30(%rsp),%rsi
400fe6: 8b 7c 24 5c mov 0x5c(%rsp),%edi
400fea: e8 3b ff ff ff callq 400f2a <func2a>
400fef: 48 89 e6 mov %rsp,%rsi
400ff2: 8b 7c 24 58 mov 0x58(%rsp),%edi
400ff6: e8 2f ff ff ff callq 400f2a <func2a>
400ffb: bb 00 00 00 00 mov $0x0,%ebx
401000: 8b 04 1c mov (%rsp,%rbx,1),%eax
401003: 39 44 1c 30 cmp %eax,0x30(%rsp,%rbx,1)
401007: 74 05 je 40100e <phase_2+0xc1>
401009: e8 81 06 00 00 callq 40168f <explode_bomb>
40100e: 48 83 c3 04 add $0x4,%rbx
401012: 48 83 fb 28 cmp $0x28,%rbx
401016: 75 e8 jne 401000 <phase_2+0xb3>
401018: 48 83 c4 60 add $0x60,%rsp
40101c: 5b pop %rbx
40101d: c3 retq
I completely understand what phase_2 is doing, I just don't understand what func2a is doing and how it affects the values at 0x30(%rsp) and so on. Because of this I always get to the comparison statement at 0x401003, and the bomb eventually explodes there.
My problem is I don't understand how the input (phase solution) is affecting the values at 0x30(%rsp) via func2a.
400f2a: 85 ff test %edi,%edi
400f2c: 74 1d je 400f4b <func2a+0x21>
This is just an early exit for when edi is zero (je is the same as jz).
400f2e: b9 cd cc cc cc mov $0xcccccccd,%ecx
400f33: 89 f8 mov %edi,%eax
400f35: f7 e1 mul %ecx
400f37: c1 ea 03 shr $0x3,%edx
This is a classic optimization trick; it is the integer arithmetic equivalent of dividing by multiplying by the inverse (see here for details); in practice, here it's the same as saying edx = edi / 10;
400f3a: 8d 04 92 lea (%rdx,%rdx,4),%eax
400f3d: 01 c0 add %eax,%eax
Here it is exploiting lea to perform arithmetic (and it's way clearer in Intel syntax, where it is lea eax,[rdx+rdx*4] => eax = edx*5), then sums the result with itself. It all boils down to eax = edx*10.
400f3f: 29 c7 sub %eax,%edi
Then, subtract it back to edi.
So, all in all this is a complicated (but fast) way to compute the last decimal digit of edi; what we have until now is something like:
void func2a(unsigned edi) {
if(edi==0) return;
label1:
edx=edi/10;
edi%=10;
// ...
}
(label1: is there because 400f33 is a jump target later)
Going on:
400f41: 83 04 be 01 addl $0x1,(%rsi,%rdi,4)
Again, this is way clearer to me in Intel syntax - add dword [rsi+rdi*4],byte +0x1. It is a regular increment into an array of 32-bit int (rdi is multiplied by 4); so, we can imagine that rsi points to an array of integers, indexed with the just-calculated last digit of edi.
void func2a(unsigned edi, int rsi[]) {
if(edi==0) return;
label1:
edx=edi/10;
edi%=10;
rsi[edi]++;
}
Then:
400f45: 89 d7 mov %edx,%edi
400f47: 85 d2 test %edx,%edx
400f49: 75 e8 jne 400f33 <func2a+0x9>
Move the result of the division we calculated above to edi, and loop if it's different from zero.
400f4b: f3 c3 repz retq
Return (using an unusual encoding of the instruction that is optimal for certain AMD processors).
So, by rewriting the jumps with a while loop and giving some meaningful names...
// number is edi, digits_count is rsi, as per regular
// x64 SystemV calling convention
void count_digits(unsigned number, int digits_count[]) {
while(number) {
digits_count[number%10]++;
number/=10;
}
}
I.e., this is a function that, given an integer, counts the occurrences of the single decimal digits, by incrementing the corresponding buckets in the digits_count array.
Fun fact: if we give the C code above to gcc (almost any recent version at -O1) we obtain back exactly the assembly you provided.

Manual decompilation of asm snippet

I've been trying to decompile the following asm snippet(that's all I have):
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 81 ec d0 00 00 00 sub $0xd0,%rsp
64 48 8b 04 25 28 00 mov %fs:0x28,%rax
00 00
48 89 45 f8 mov %rax,-0x8(%rbp)
31 c0 xor %eax,%eax
48 c7 85 30 ff ff ff movq $0x0,-0xd0(%rbp)
00 00 00 00
48 8d b5 38 ff ff ff lea -0xc8(%rbp),%rsi
b8 00 00 00 00 mov $0x0,%eax
ba 18 00 00 00 mov $0x18,%edx
48 89 f7 mov %rsi,%rdi
48 89 d1 mov %rdx,%rcx
f3 48 ab rep stos %rax,%es:(%rdi)
48 8b 15 19 06 20 00 mov 0x200619(%rip),%rdx
48 8d 85 30 ff ff ff lea -0xd0(%rbp),%rax
be ce 0f 40 00 mov $0x400fce,%esi
48 89 c7 mov %rax,%rdi
b8 00 00 00 00 mov $0x0,%eax
e8 4e fc ff ff callq 4008a0 <sprintf#plt>
Here is my attempt:
char buf[192] = {0};
sprintf(buf, "hello %s", name);
I've compiled this with gcc 4.8.5, and it gave me:
55 push %rbp
48 89 e5 mov %rsp,%rbp
48 81 ec d0 00 00 00 sub $0xd0,%rsp
64 48 8b 04 25 28 00 mov %fs:0x28,%rax
00 00
48 89 45 f8 mov %rax,-0x8(%rbp)
31 c0 xor %eax,%eax
48 8d b5 30 ff ff ff lea -0xd0(%rbp),%rsi
b8 00 00 00 00 mov $0x0,%eax
ba 18 00 00 00 mov $0x18,%edx
48 89 f7 mov %rsi,%rdi
48 89 d1 mov %rdx,%rcx
f3 48 ab rep stos %rax,%es:(%rdi)
48 8b 15 14 14 20 00 mov 0x201414(%rip),%rdx
48 8d 85 30 ff ff ff lea -0xd0(%rbp),%rax
be 2e 10 40 00 mov $0x40102e,%esi
48 89 c7 mov %rax,%rdi
b8 00 00 00 00 mov $0x0,%eax
e8 cb fb ff ff callq 4008a0 <sprintf#plt>
I'm struggling to figure out why this exists:
movq $0x0,-0xd0(%rbp)
and also the subsequent usage of -0xd0(%rbp) as a pointer for the argument to sprintf. I'm puzzled because the rep stos begin at -0xc8(%rbp) and not -0xd0(%rbp).
This is probably compiler specific, but still I'm curious what could possibly be the original code that produced that asm.
I imagine something like:
char buf[192] = {0, 0, 0, 0, 0, 0, 0, 0};
sprintf(buf + 8, "hello %s", name);
... would give you that output.
The movq instruction you refer to stores 0 (an 8-byte quantity) at the beginning of an array. The -0xc8(%rbp) comes from copying a string to an offset within the array.

Jump Table not found in Assembly code

I'm learning Assembly. I wrote the below c program containing switch case, created the object file(gcc -o filename filename.c), then took the object dump. But I didn't find the Labels and jump tables in the object dump.
Can anybody tell me why the jump table is not getting generated ? Like the ones mention here
Link
Code
C file
int main() {
int i = 0;
int n = 9, z = 99 , p = 999;
switch( i )
{
case -1:
n++;
printf("value n=%d",n);
break;
case 0 :
z++;
printf("value z=%d",z);
break;
case 1 :
p++;
printf("value p=%d",p);
break;
case 2 :
p++;
printf("value p=%d",p);
break;
case 3 :
p++;
printf("value p=%d",p);
break;
case 4 :
p++;
printf("value p=%d",p);
break;
case 5 :
p++;
printf("value p=%d",p);
break;
}
printf("Values n=%d z=%d p=%d \n",n,z,p);
return 0;
}
Below is the main section
0804841d <main>:
804841d: 55 push %ebp
804841e: 89 e5 mov %esp,%ebp
8048420: 83 e4 f0 and $0xfffffff0,%esp
8048423: 83 ec 20 sub $0x20,%esp
8048426: c7 44 24 1c 00 00 00 movl $0x0,0x1c(%esp)
804842d: 00
804842e: c7 44 24 10 09 00 00 movl $0x9,0x10(%esp)
8048435: 00
8048436: c7 44 24 14 63 00 00 movl $0x63,0x14(%esp)
804843d: 00
804843e: c7 44 24 18 e7 03 00 movl $0x3e7,0x18(%esp)
8048445: 00
8048446: 8b 44 24 1c mov 0x1c(%esp),%eax
804844a: 83 c0 01 add $0x1,%eax
804844d: 83 f8 06 cmp $0x6,%eax
8048450: 0f 87 cb 00 00 00 ja 8048521 <main+0x104>
8048456: 8b 04 85 1c 86 04 08 mov 0x804861c(,%eax,4),%eax
804845d: ff e0 jmp *%eax
804845f: 83 44 24 10 01 addl $0x1,0x10(%esp)
8048464: 8b 44 24 10 mov 0x10(%esp),%eax
8048468: 89 44 24 04 mov %eax,0x4(%esp)
804846c: c7 04 24 e0 85 04 08 movl $0x80485e0,(%esp)
8048473: e8 78 fe ff ff call 80482f0 <printf#plt>
8048478: e9 a4 00 00 00 jmp 8048521 <main+0x104>
804847d: 83 44 24 14 01 addl $0x1,0x14(%esp)
8048482: 8b 44 24 14 mov 0x14(%esp),%eax
8048486: 89 44 24 04 mov %eax,0x4(%esp)
804848a: c7 04 24 eb 85 04 08 movl $0x80485eb,(%esp)
8048491: e8 5a fe ff ff call 80482f0 <printf#plt>
8048496: e9 86 00 00 00 jmp 8048521 <main+0x104>
804849b: 83 44 24 18 01 addl $0x1,0x18(%esp)
80484a0: 8b 44 24 18 mov 0x18(%esp),%eax
80484a4: 89 44 24 04 mov %eax,0x4(%esp)
80484a8: c7 04 24 f6 85 04 08 movl $0x80485f6,(%esp)
80484af: e8 3c fe ff ff call 80482f0 <printf#plt>
80484b4: eb 6b jmp 8048521 <main+0x104>
80484b6: 83 44 24 18 01 addl $0x1,0x18(%esp)
80484bb: 8b 44 24 18 mov 0x18(%esp),%eax
80484bf: 89 44 24 04 mov %eax,0x4(%esp)
80484c3: c7 04 24 f6 85 04 08 movl $0x80485f6,(%esp)
80484ca: e8 21 fe ff ff call 80482f0 <printf#plt>
80484cf: eb 50 jmp 8048521 <main+0x104>
80484d1: 83 44 24 18 01 addl $0x1,0x18(%esp)
80484d6: 8b 44 24 18 mov 0x18(%esp),%eax
80484da: 89 44 24 04 mov %eax,0x4(%esp)
80484de: c7 04 24 f6 85 04 08 movl $0x80485f6,(%esp)
80484e5: e8 06 fe ff ff call 80482f0 <printf#plt>
80484ea: eb 35 jmp 8048521 <main+0x104>
80484ec: 83 44 24 18 01 addl $0x1,0x18(%esp)
80484f1: 8b 44 24 18 mov 0x18(%esp),%eax
80484f5: 89 44 24 04 mov %eax,0x4(%esp)
80484f9: c7 04 24 f6 85 04 08 movl $0x80485f6,(%esp)
8048500: e8 eb fd ff ff call 80482f0 <printf#plt>
8048505: eb 1a jmp 8048521 <main+0x104>
8048507: 83 44 24 18 01 addl $0x1,0x18(%esp)
804850c: 8b 44 24 18 mov 0x18(%esp),%eax
8048510: 89 44 24 04 mov %eax,0x4(%esp)
8048514: c7 04 24 f6 85 04 08 movl $0x80485f6,(%esp)
804851b: e8 d0 fd ff ff call 80482f0 <printf#plt>
8048520: 90 nop
8048521: 8b 44 24 18 mov 0x18(%esp),%eax
8048525: 89 44 24 0c mov %eax,0xc(%esp)
8048529: 8b 44 24 14 mov 0x14(%esp),%eax
804852d: 89 44 24 08 mov %eax,0x8(%esp)
8048531: 8b 44 24 10 mov 0x10(%esp),%eax
8048535: 89 44 24 04 mov %eax,0x4(%esp)
8048539: c7 04 24 01 86 04 08 movl $0x8048601,(%esp)
8048540: e8 ab fd ff ff call 80482f0 <printf#plt>
8048545: b8 00 00 00 00 mov $0x0,%eax
804854a: c9 leave
804854b: c3 ret
804854c: 66 90 xchg %ax,%ax
804854e: 66 90 xchg %ax,%ax
Below is the .rodata section
Disassembly of section .rodata:
080485d8 <_fp_hw>:
80485d8: 03 00 add (%eax),%eax
...
Can anybody let me know why this is behaving like this?
Thanks in Advance
Your jump table is located at address 0x804861c. If you dump this address, I'm pretty sure, you'll find the values 0x804845f, 0x804847d, 0x804849b, etc. since these values correspond to the addresses of the branches of the switch statement.
What happens is that first it is ensured that the value of i (0x1c(%esp))is between 0 and 6 (and jump if above, ja, to last printf) and if it is between, uses its value multiplied by 4 (sizeof addresses on your architecture) as an offset into the jump table (0x804861c(,%eax,4),%eax).
I'm not sure what you're looking for exactly, or what you're trying to achieve, but as #Jens Gustedt pointed out, you should use the -S switch if you want to observe the assembly generated.
Additionally, beware that your code can easily be optimised by the compiler, i.e. as soon as you use the -O switch your assembly will probably shrink down to the last printf and the return statement, since the whole code execution can be predicted and useless parts can be omitted.

can some help understand this objdump of c [closed]

It's difficult to tell what is being asked here. This question is ambiguous, vague, incomplete, overly broad, or rhetorical and cannot be reasonably answered in its current form. For help clarifying this question so that it can be reopened, visit the help center.
Closed 10 years ago.
08048544 <compare_password>:
8048544: 55 push %ebp
8048545: 89 e5 mov %esp,%ebp
8048547: 83 ec 38 sub $0x38,%esp
804854a: 8b 45 0c mov 0xc(%ebp),%eax
804854d: 89 45 d4 mov %eax,-0x2c(%ebp)
8048550: 65 a1 14 00 00 00 mov %gs:0x14,%eax
8048556: 89 45 f4 mov %eax,-0xc(%ebp)
8048559: 31 c0 xor %eax,%eax
804855b: c7 45 e4 00 00 00 00 movl $0x0,-0x1c(%ebp)
8048562: c7 45 e0 00 00 00 00 movl $0x0,-0x20(%ebp)
8048569: eb 22 jmp 804858d <compare_password+0x49>
804856b: 8b 45 e0 mov -0x20(%ebp),%eax
804856e: 03 45 d4 add -0x2c(%ebp),%eax
8048571: 0f b6 10 movzbl (%eax),%edx
8048574: 8b 45 e0 mov -0x20(%ebp),%eax
8048577: 05 44 a1 04 08 add $0x804a144,%eax
804857c: 0f b6 00 movzbl (%eax),%eax
804857f: 31 c2 xor %eax,%edx
8048581: 8d 45 ea lea -0x16(%ebp),%eax
8048584: 03 45 e0 add -0x20(%ebp),%eax
8048587: 88 10 mov %dl,(%eax)
8048589: 83 45 e0 01 addl $0x1,-0x20(%ebp)
804858d: 83 7d e0 09 cmpl $0x9,-0x20(%ebp)
8048591: 7e d8 jle 804856b <compare_password+0x27>
8048593: c7 45 e0 00 00 00 00 movl $0x0,-0x20(%ebp)
804859a: eb 2c jmp 80485c8 <compare_password+0x84>
804859c: 8b 55 08 mov 0x8(%ebp),%edx
804859f: 89 d0 mov %edx,%eax
80485a1: c1 e0 02 shl $0x2,%eax
80485a4: 01 d0 add %edx,%eax
80485a6: 01 c0 add %eax,%eax
80485a8: 03 45 e0 add -0x20(%ebp),%eax
80485ab: 05 e0 a0 04 08 add $0x804a0e0,%eax
80485b0: 0f b6 10 movzbl (%eax),%edx
80485b3: 8d 45 ea lea -0x16(%ebp),%eax
80485b6: 03 45 e0 add -0x20(%ebp),%eax
80485b9: 0f b6 00 movzbl (%eax),%eax
80485bc: 38 c2 cmp %al,%dl
80485be: 75 04 jne 80485c4 <compare_password+0x80>
80485c0: 83 45 e4 01 addl $0x1,-0x1c(%ebp)
80485c4: 83 45 e0 01 addl $0x1,-0x20(%ebp)
80485c8: 83 7d e0 09 cmpl $0x9,-0x20(%ebp)
80485cc: 7e ce jle 804859c <compare_password+0x58>
80485ce: 83 7d e4 08 cmpl $0x8,-0x1c(%ebp)
80485d2: 7e 07 jle 80485db <compare_password+0x97>
80485d4: b8 01 00 00 00 mov $0x1,%eax
80485d9: eb 05 jmp 80485e0 <compare_password+0x9c>
80485db: b8 00 00 00 00 mov $0x0,%eax
80485e0: 8b 55 f4 mov -0xc(%ebp),%edx
80485e3: 65 33 15 14 00 00 00 xor %gs:0x14,%edx
80485ea: 74 05 je 80485f1 <compare_password+0xad>
80485ec: e8 2f fe ff ff call 8048420 <__stack_chk_fail#plt>
80485f1: c9 leave
80485f2: c3 ret
080485f3 <main>:
80485f3: 55 push %ebp
80485f4: 89 e5 mov %esp,%ebp
80485f6: 83 e4 f0 and $0xfffffff0,%esp
80485f9: 83 ec 30 sub $0x30,%esp
80485fc: 65 a1 14 00 00 00 mov %gs:0x14,%eax
8048602: 89 44 24 2c mov %eax,0x2c(%esp)
8048606: 31 c0 xor %eax,%eax
8048608: c7 44 24 04 00 00 00 movl $0x0,0x4(%esp)
804860f: 00
8048610: 8d 44 24 10 lea 0x10(%esp),%eax
8048614: 89 04 24 mov %eax,(%esp)
8048617: e8 f4 fd ff ff call 8048410 <gettimeofday#plt>
804861c: 8b 54 24 10 mov 0x10(%esp),%edx
8048620: 8b 44 24 14 mov 0x14(%esp),%eax
8048624: 0f af c2 imul %edx,%eax
8048627: 89 04 24 mov %eax,(%esp)
804862a: e8 21 fe ff ff call 8048450 <srand#plt>
804862f: e8 3c fe ff ff call 8048470 <rand#plt>
8048634: 89 44 24 18 mov %eax,0x18(%esp)
8048638: 8b 4c 24 18 mov 0x18(%esp),%ecx
804863c: ba 67 66 66 66 mov $0x66666667,%edx
8048641: 89 c8 mov %ecx,%eax
8048643: f7 ea imul %edx
8048645: c1 fa 02 sar $0x2,%edx
8048648: 89 c8 mov %ecx,%eax
804864a: c1 f8 1f sar $0x1f,%eax
804864d: 29 c2 sub %eax,%edx
804864f: 89 d0 mov %edx,%eax
8048651: c1 e0 02 shl $0x2,%eax
8048654: 01 d0 add %edx,%eax
8048656: 01 c0 add %eax,%eax
8048658: 89 ca mov %ecx,%edx
804865a: 29 c2 sub %eax,%edx
804865c: 89 d0 mov %edx,%eax
804865e: 89 44 24 18 mov %eax,0x18(%esp)
8048662: 8b 54 24 18 mov 0x18(%esp),%edx
8048666: 89 d0 mov %edx,%eax
8048668: c1 e0 02 shl $0x2,%eax
804866b: 01 d0 add %edx,%eax
804866d: 01 c0 add %eax,%eax
804866f: 8d 90 60 a0 04 08 lea 0x804a060(%eax),%edx
8048675: b8 c0 87 04 08 mov $0x80487c0,%eax
804867a: 89 54 24 04 mov %edx,0x4(%esp)
804867e: 89 04 24 mov %eax,(%esp)
8048681: e8 7a fd ff ff call 8048400 <printf#plt>
8048686: b8 da 87 04 08 mov $0x80487da,%eax
804868b: 8d 54 24 22 lea 0x22(%esp),%edx
804868f: 89 54 24 04 mov %edx,0x4(%esp)
8048693: 89 04 24 mov %eax,(%esp)
8048696: e8 e5 fd ff ff call 8048480 <__isoc99_scanf#plt>
804869b: 8d 44 24 22 lea 0x22(%esp),%eax
804869f: 89 44 24 04 mov %eax,0x4(%esp)
80486a3: 8b 44 24 18 mov 0x18(%esp),%eax
80486a7: 89 04 24 mov %eax,(%esp)
80486aa: e8 95 fe ff ff call 8048544 <compare_password>
80486af: 89 44 24 1c mov %eax,0x1c(%esp)
80486b3: 83 7c 24 1c 01 cmpl $0x1,0x1c(%esp)
80486b8: 75 0e jne 80486c8 <main+0xd5>
80486ba: c7 04 24 dd 87 04 08 movl $0x80487dd,(%esp)
80486c1: e8 6a fd ff ff call 8048430 <puts#plt>
80486c6: eb 0c jmp 80486d4 <main+0xe1>
80486c8: c7 04 24 f2 87 04 08 movl $0x80487f2,(%esp)
80486cf: e8 5c fd ff ff call 8048430 <puts#plt>
80486d4: 8b 54 24 2c mov 0x2c(%esp),%edx
80486d8: 65 33 15 14 00 00 00 xor %gs:0x14,%edx
80486df: 74 05 je 80486e6 <main+0xf3>
80486e1: e8 3a fd ff ff call 8048420 <__stack_chk_fail#plt>
80486e6: c9 leave
80486e7: c3 ret
80486e8: 90 nop
80486e9: 90 nop
80486ea: 90 nop
80486eb: 90 nop
80486ec: 90 nop
80486ed: 90 nop
80486ee: 90 nop
80486ef: 90 nop
Ok, learning assembly code from scratch will take some time and effort, but there's no harm in getting the basics.
Each line of this output contains three parts:
The offset in the file where that piece of code is (in hex)
The bytes that make up that piece of code (each in hex, again)
The assembly language form of that code (basically reverse-translated from the bytes).
You can generally read the flow of the program through the last column. Instructions like JMPs will refer to other locations, which may or may not be nearby in the code. They may be presented in a labelled form like:
jmp 804858d <compare_password+0x49>
That says, jump to offset 0x804858d, so you can find that value in the first column. The label says that this is offset 0x49 after compare_password.
If you don't know what most of the instructions do, well, they mostly move, combine and compare individual words of memory and register. Even when you learn what each code does, understanding what it does in the context of this particular program can be hard. And you generally need to know the location of other important pieces of data when the program will be running to know what the effect will be.
There are lots of resources for learning computer programming at the level of debugging, assembly language and dissassembly, but I will leave it to others to refer you. If you really want to learn, a good way is to write your own simple program in C, and compile it to assembly. Then compare the C and assembly output side-by-side, figuring out how the C statements have been translated into instructions.

Resources