I use ghidra armv7 reverse firmware.
Analyzing to address 0x0002843a looks like r0 loads the data of address 0x0002881c but address 0x0002881c addr looks like an instruction.
I read the ARM®v7-M ArchitectureReference Manual and do not see the addr instruction.
I don’t understand what addr means, please help me.
**************************************************************
* FUNCTION *
**************************************************************
undefined FUN_0002842e()
assume LRset = 0x0
assume TMode = 0x1
undefined r0:1 <RETURN>
FUN_0002842e XREF[1]: FUN_0002706c:00027168(c)
0002842e 10 b5 push { r4, lr }
00028430 04 46 mov r4,r0
00028432 20 46 mov r0,r4
00028434 ff f7 52 fe bl FUN_000280dc undefined FUN_000280dc()
00028438 04 46 mov r4,r0
0002843a f8 48 ldr r0,[PTR_PTR_FUN_00028488+1_0002881c] = 0066a66c**
0002843c 20 60 str r0=>PTR_FUN_00028488+1_0066a66c,[r4,#0x0] = 00028489
0002843e 00 20 mov r0,#0x0
00028440 60 60 str r0,[r4,#0x4]
00028442 a0 60 str r0,[r4,#0x8]
00028444 20 61 str r0,[r4,#0x10]
00028446 e0 60 str r0,[r4,#0xc]
00028448 60 61 str r0,[r4,#0x14]
0002844a 00 21 mov r1,#0x0
0002844c 5c 20 mov r0,#0x5c
0002844e 01 55 strb r1,[r0,r4]
00028450 5d 20 mov r0,#0x5d
00028452 01 55 strb r1,[r0,r4]
00028454 5e 20 mov r0,#0x5e
00028456 01 55 strb r1,[r0,r4]
00028458 00 20 mov r0,#0x0
0002845a a0 63 str r0,[r4,#0x38]
0002845c e1 63 str r1,[r4,#0x3c]
0002845e a0 64 str r0,[r4,#0x48]
00028460 02 21 mov r1,#0x2
00028462 e1 64 str r1,[r4,#0x4c]
00028464 ee 48 ldr r0,[DAT_00028820] = 188DA314h
00028466 00 6c ldr r0,[r0,#0x40]=>DAT_188da354
00028468 00 28 cmp r0,#0x0
0002846a 03 d1 bne LAB_00028474
0002846c ec 48 ldr r0,[DAT_00028820] = 188DA314h
0002846e 44 30 add r0,#0x44
00028470 eb 49 ldr r1,[DAT_00028820] = 188DA314h
00028472 08 64 str r0,[r1,#0x40]=>DAT_188da354
LAB_00028474 XREF[1]: 0002846a(j)
00028474 eb 48 ldr r0,[DAT_00028824] = 188DA494h
00028476 80 6b ldr r0,[r0,#0x38]=>DAT_188da4cc
00028478 00 28 cmp r0,#0x0
0002847a 03 d1 bne LAB_00028484
0002847c e9 49 ldr r1,[DAT_00028824] = 188DA494h
0002847e 3c 31 add r1,#0x3c
00028480 e8 48 ldr r0,[DAT_00028824] = 188DA494h
00028482 81 63 str r1,[r0,#0x38]=>DAT_188da4cc
LAB_00028484 XREF[1]: 0002847a(j)
00028484 20 46 mov r0,r4
00028486 10 bd pop { r4, pc }
PTR_PTR_FUN_00028488+1_0002881c XREF[1]: FUN_0002842e:0002843a(R)
0002881c 6c a6 66 00 addr PTR_FUN_00028488+1_0066a66c = 00028489
PTR_FUN_00028488+1_0066a66c XREF[2]: FUN_0002842e:0002843c(*),
0002881c(*)
0066a66c 89 84 02 00 addr FUN_00028488+1
0066a670 99 84 02 00 addr FUN_00028498+1
0066a674 d5 90 02 00 addr FUN_000290d4+1
0066a678 45 d8 02 00 addr FUN_0002d844+1
0066a67c 73 da 02 00 addr LAB_0002da72+1
0066a680 75 da 02 00 addr FUN_0002da74+1
0066a684 c7 d9 02 00 addr DAT_0002d9c7 = B5h
0066a688 fb 80 02 00 addr LAB_000280fa+1
addr is an assembler directive: it initialises the next four bytes (or eight bytes in 64-bit mode) to the address of its argument. (The +1 after all your function addresses means that you are in Thumb mode, which is indicated by bit 0 of the PC being set to 1. I confess I have never seen the strange PTR_FUN_00028488+1_0066a66c syntax. Perhaps others can explain it.)
These addresses are never reached at execution time, because of the pop instruction.
Related
This question already has answers here:
What is the strict aliasing rule?
(11 answers)
Why does unaligned access to mmap'ed memory sometimes segfault on AMD64?
(1 answer)
Closed 2 years ago.
Two versions of a function that multiplies a unsigned 256 bit integer by constant 977. (for cryptography application) Note __int128_t type is used in the 2nd function.
#include <stdint.h>
#include <stdio.h>
// z,z_carry = uint256(y) * 977
static inline void multiply_977(uint32_t *y, uint32_t *z) {
const uint32_t x = 977;
uint32_t high=0;
uint64_t prod = x*(uint64_t)(y[0]); z[0] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[1])); z[1] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[2])); z[2] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[3])); z[3] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[4])); z[4] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[5])); z[5] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[6])); z[6] = prod;
high=prod>>32; prod = high + (x*(uint64_t)(y[7])); z[7] = prod;
high=prod>>32; z[8] = (uint32_t)high; z[9] = 0;
}
static inline void multiply_977_2(uint32_t *y_, uint32_t *z_) {
const uint32_t x = 977;
uint64_t *y = (uint64_t*)y_;
uint64_t *z = (uint64_t*)z_;
uint64_t high=0;
__uint128_t prod = x * (__uint128_t)(y[0]);
z[0] = prod; high=prod>>64; prod = high + (x * (__uint128_t)(y[1]));
z[1] = prod; high=prod>>64; prod = high + (x * (__uint128_t)(y[2]));
z[2] = prod; high=prod>>64; prod = high + (x * (__uint128_t)(y[3]));
z[3] = prod; high=prod>>64;
z[4] = high&0x00000000ffffffff;
// z_[8] = (uint64_t)high; z_[9] = 0;
}
int main(int argc, char** argv) {
uint32_t a[10] = {0};
uint32_t b[10] = {0};
a[0] = 1;
b[0] = 1;
for (int i=0; i<100; ++i) {
multiply_977(a, a);
multiply_977_2(b, b);
}
for (int i=0; i<8; ++i) { printf("%08x ", a[i]); }
printf("\n");
for (int i=0; i<8; ++i) { printf("%08x ", b[i]); }
printf("\n");
return 0;
}
output with and without -O3, results are expected to be identical:
khaotik#KKST2:~/tmp$ gcc a.c; ./a.out
2372c341 af466dcc 57f3a318 7ce73fd9 cd8f973d 81dc6c7f 84637b1d f0de09cd
2372c341 af466dcc 57f3a318 7ce73fd9 cd8f973d 81dc6c7f 84637b1d f0de09cd
khaotik#KKST2:~/tmp$ gcc -O3 a.c; ./a.out
2372c341 af466dcc 57f3a318 7ce73fd9 cd8f973d 81dc6c7f 84637b1d f0de09cd
00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000000
With -O3, gcc generates SSE code:
00000000000010a0 <main>:
10a0: f3 0f 1e fa endbr64
10a4: 41 54 push %r12
10a6: 66 0f ef c0 pxor %xmm0,%xmm0
10aa: b9 64 00 00 00 mov $0x64,%ecx
10af: 45 31 c9 xor %r9d,%r9d
10b2: 55 push %rbp
10b3: 31 f6 xor %esi,%esi
10b5: 31 ed xor %ebp,%ebp
10b7: 45 31 db xor %r11d,%r11d
10ba: 53 push %rbx
10bb: 45 31 c0 xor %r8d,%r8d
10be: 31 db xor %ebx,%ebx
10c0: 45 31 d2 xor %r10d,%r10d
10c3: bf 01 00 00 00 mov $0x1,%edi
10c8: 48 83 ec 60 sub $0x60,%rsp
10cc: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
10d3: 00 00
10d5: 48 89 44 24 58 mov %rax,0x58(%rsp)
10da: 31 c0 xor %eax,%eax
10dc: 0f 29 44 24 30 movaps %xmm0,0x30(%rsp)
10e1: c7 44 24 24 00 00 00 movl $0x0,0x24(%rsp)
10e8: 00
10e9: 48 c7 44 24 50 00 00 movq $0x0,0x50(%rsp)
10f0: 00 00
10f2: c7 44 24 30 01 00 00 movl $0x1,0x30(%rsp)
10f9: 00
10fa: 0f 29 44 24 40 movaps %xmm0,0x40(%rsp)
10ff: 90 nop
1100: 89 f8 mov %edi,%eax
1102: 4d 69 c9 d1 03 00 00 imul $0x3d1,%r9,%r9
1109: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1110: 89 c7 mov %eax,%edi
1112: 48 c1 e8 20 shr $0x20,%rax
1116: 48 89 c2 mov %rax,%rdx
1119: 44 89 d0 mov %r10d,%eax
111c: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1123: 48 01 d0 add %rdx,%rax
1126: 41 89 c2 mov %eax,%r10d
1129: 48 c1 e8 20 shr $0x20,%rax
112d: 48 89 c2 mov %rax,%rdx
1130: 44 89 c0 mov %r8d,%eax
1133: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
113a: 48 01 d0 add %rdx,%rax
113d: 41 89 c0 mov %eax,%r8d
1140: 48 c1 e8 20 shr $0x20,%rax
1144: 48 89 c2 mov %rax,%rdx
1147: 44 89 d8 mov %r11d,%eax
114a: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1151: 48 01 d0 add %rdx,%rax
1154: 41 89 c3 mov %eax,%r11d
1157: 48 c1 e8 20 shr $0x20,%rax
115b: 48 89 c2 mov %rax,%rdx
115e: 89 f0 mov %esi,%eax
1160: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1167: 48 01 d0 add %rdx,%rax
116a: 89 c6 mov %eax,%esi
116c: 48 c1 e8 20 shr $0x20,%rax
1170: 48 89 c2 mov %rax,%rdx
1173: 89 e8 mov %ebp,%eax
1175: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
117c: 48 01 d0 add %rdx,%rax
117f: 89 c5 mov %eax,%ebp
1181: 48 c1 e8 20 shr $0x20,%rax
1185: 48 89 c2 mov %rax,%rdx
1188: 89 d8 mov %ebx,%eax
118a: 48 69 c0 d1 03 00 00 imul $0x3d1,%rax,%rax
1191: 48 01 d0 add %rdx,%rax
1194: 89 c3 mov %eax,%ebx
1196: 48 c1 e8 20 shr $0x20,%rax
119a: 4c 01 c8 add %r9,%rax
119d: 41 89 c1 mov %eax,%r9d
11a0: 83 e9 01 sub $0x1,%ecx
11a3: 0f 85 57 ff ff ff jne 1100 <main+0x60>
11a9: 66 41 0f 6e c8 movd %r8d,%xmm1
11ae: 66 41 0f 6e d3 movd %r11d,%xmm2
11b3: 66 0f 6e c7 movd %edi,%xmm0
11b7: 48 c1 e8 20 shr $0x20,%rax
11bb: 66 41 0f 6e da movd %r10d,%xmm3
11c0: 66 0f 62 ca punpckldq %xmm2,%xmm1
11c4: 66 0f 6e ed movd %ebp,%xmm5
11c8: 89 44 24 20 mov %eax,0x20(%rsp)
11cc: 66 0f 62 c3 punpckldq %xmm3,%xmm0
11d0: 66 41 0f 6e e1 movd %r9d,%xmm4
11d5: 4c 8d 64 24 20 lea 0x20(%rsp),%r12
11da: 66 0f 6c c1 punpcklqdq %xmm1,%xmm0
11de: 66 0f 6e cb movd %ebx,%xmm1
11e2: 48 8d 2d 1b 0e 00 00 lea 0xe1b(%rip),%rbp # 2004 <_IO_stdin_used+0x4>
11e9: 48 89 e3 mov %rsp,%rbx
11ec: 0f 29 04 24 movaps %xmm0,(%rsp)
11f0: 66 0f 6e c6 movd %esi,%xmm0
11f4: 66 0f 62 cc punpckldq %xmm4,%xmm1
11f8: 66 0f 62 c5 punpckldq %xmm5,%xmm0
11fc: 66 0f 6c c1 punpcklqdq %xmm1,%xmm0
1200: 0f 29 44 24 10 movaps %xmm0,0x10(%rsp)
1205: 0f 1f 00 nopl (%rax)
1208: 8b 13 mov (%rbx),%edx
120a: 48 89 ee mov %rbp,%rsi
120d: bf 01 00 00 00 mov $0x1,%edi
1212: 31 c0 xor %eax,%eax
1214: 48 83 c3 04 add $0x4,%rbx
1218: e8 73 fe ff ff callq 1090 <__printf_chk#plt>
121d: 49 39 dc cmp %rbx,%r12
1220: 75 e6 jne 1208 <main+0x168>
1222: bf 0a 00 00 00 mov $0xa,%edi
1227: 48 8d 5c 24 30 lea 0x30(%rsp),%rbx
122c: 4c 8d 64 24 50 lea 0x50(%rsp),%r12
1231: e8 3a fe ff ff callq 1070 <putchar#plt>
1236: 48 8d 2d c7 0d 00 00 lea 0xdc7(%rip),%rbp # 2004 <_IO_stdin_used+0x4>
123d: 0f 1f 00 nopl (%rax)
1240: 8b 13 mov (%rbx),%edx
1242: 48 89 ee mov %rbp,%rsi
1245: bf 01 00 00 00 mov $0x1,%edi
124a: 31 c0 xor %eax,%eax
124c: 48 83 c3 04 add $0x4,%rbx
1250: e8 3b fe ff ff callq 1090 <__printf_chk#plt>
1255: 49 39 dc cmp %rbx,%r12
1258: 75 e6 jne 1240 <main+0x1a0>
125a: bf 0a 00 00 00 mov $0xa,%edi
125f: e8 0c fe ff ff callq 1070 <putchar#plt>
1264: 48 8b 44 24 58 mov 0x58(%rsp),%rax
1269: 64 48 33 04 25 28 00 xor %fs:0x28,%rax
1270: 00 00
1272: 75 0b jne 127f <main+0x1df>
1274: 48 83 c4 60 add $0x60,%rsp
1278: 31 c0 xor %eax,%eax
127a: 5b pop %rbx
127b: 5d pop %rbp
127c: 41 5c pop %r12
127e: c3 retq
127f: e8 fc fd ff ff callq 1080 <__stack_chk_fail#plt>
1284: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
128b: 00 00 00
128e: 66 90 xchg %ax,%ax
However I'm not asm-savvy enough to figure out the problem.
compiler version and cpu flags:
khaotik#KKST2:~/tmp$ gcc --version
gcc (Ubuntu 9.3.0-10ubuntu2) 9.3.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
khaotik#KKST2:~/tmp$ lscpu | grep Flags
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault invpcid_single pti ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx rdseed adx smap clflushopt intel_pt xsaveopt xsavec xgetbv1 xsaves dtherm ida arat pln pts hwp hwp_notify hwp_act_window hwp_epp md_clear flush_l1d
I'm working on an assignment for class where I have to interpret assembly. I know the input to defuse the bomb is 442, but I'm not exactly sure why.
8048c80: 83 ec 2c sub $0x2c,%esp
8048c83: c7 44 24 1c 00 00 00 movl $0x0,0x1c(%esp)
8048c8a: 00
8048c8b: 8d 44 24 1c lea 0x1c(%esp),%eax
8048c8f: 89 44 24 08 mov %eax,0x8(%esp)
8048c93: c7 44 24 04 64 a7 04 movl $0x804a764,0x4(%esp)
8048c9a: 08
8048c9b: 8b 44 24 30 mov 0x30(%esp),%eax
8048c9f: 89 04 24 mov %eax,(%esp)
8048ca2: e8 59 fc ff ff call 8048900 <__isoc99_sscanf#plt>
8048ca7: 83 f8 01 cmp $0x1,%eax
8048caa: 74 05 je 8048cb1 <phase_1+0x31>
8048cac: e8 e4 07 00 00 call 8049495 <explode_bomb>
8048cb1: 81 7c 24 1c ba 01 00 cmpl $0x1ba,0x1c(%esp)
8048cb8: 00
8048cb9: 74 05 je 8048cc0 <phase_1+0x40>
8048cbb: e8 d5 07 00 00 call 8049495 <explode_bomb>
8048cc0: 83 c4 2c add $0x2c,%esp
8048cc3: c3 ret
Sscanf takes two values, "%d" and my inputted value, but I'm not sure where it stores the value or why %eax is 1 or why 0x1c(%esp) has the value. We store 0x0 there at the beginning, and then move 0x30(%esp), %eax, so shouldn't it be 0? Any help understanding this would be very much appreciated.
To be clear, this is x86 in at&t syntax.
I am currently working on phase 2 of the binary bomb assignment. I'm having trouble deciphering exactly what a certain function does when called. I've been stuck on it for days.
The function is:
0000000000400f2a <func2a>:
400f2a: 85 ff test %edi,%edi
400f2c: 74 1d je 400f4b <func2a+0x21>
400f2e: b9 cd cc cc cc mov $0xcccccccd,%ecx
400f33: 89 f8 mov %edi,%eax
400f35: f7 e1 mul %ecx
400f37: c1 ea 03 shr $0x3,%edx
400f3a: 8d 04 92 lea (%rdx,%rdx,4),%eax
400f3d: 01 c0 add %eax,%eax
400f3f: 29 c7 sub %eax,%edi
400f41: 83 04 be 01 addl $0x1,(%rsi,%rdi,4)
400f45: 89 d7 mov %edx,%edi
400f47: 85 d2 test %edx,%edx
400f49: 75 e8 jne 400f33 <func2a+0x9>
400f4b: f3 c3 repz retq
It gets called in the larger function "phase_2":
0000000000400f4d <phase_2>:
400f4d: 53 push %rbx
400f4e: 48 83 ec 60 sub $0x60,%rsp
400f52: 48 c7 44 24 30 00 00 movq $0x0,0x30(%rsp)
400f59: 00 00
400f5b: 48 c7 44 24 38 00 00 movq $0x0,0x38(%rsp)
400f62: 00 00
400f64: 48 c7 44 24 40 00 00 movq $0x0,0x40(%rsp)
400f6b: 00 00
400f6d: 48 c7 44 24 48 00 00 movq $0x0,0x48(%rsp)
400f74: 00 00
400f76: 48 c7 44 24 50 00 00 movq $0x0,0x50(%rsp)
400f7d: 00 00
400f7f: 48 c7 04 24 00 00 00 movq $0x0,(%rsp)
400f86: 00
400f87: 48 c7 44 24 08 00 00 movq $0x0,0x8(%rsp)
400f8e: 00 00
400f90: 48 c7 44 24 10 00 00 movq $0x0,0x10(%rsp)
400f97: 00 00
400f99: 48 c7 44 24 18 00 00 movq $0x0,0x18(%rsp)
400fa0: 00 00
400fa2: 48 c7 44 24 20 00 00 movq $0x0,0x20(%rsp)
400fa9: 00 00
400fab: 48 8d 4c 24 58 lea 0x58(%rsp),%rcx
400fb0: 48 8d 54 24 5c lea 0x5c(%rsp),%rdx
400fb5: be 9e 26 40 00 mov $0x40269e,%esi
400fba: b8 00 00 00 00 mov $0x0,%eax
400fbf: e8 6c fc ff ff callq 400c30 <__isoc99_sscanf#plt>
400fc4: 83 f8 02 cmp $0x2,%eax
400fc7: 74 05 je 400fce <phase_2+0x81>
400fc9: e8 c1 06 00 00 callq 40168f <explode_bomb>
400fce: 83 7c 24 5c 64 cmpl $0x64,0x5c(%rsp)
400fd3: 76 07 jbe 400fdc <phase_2+0x8f>
400fd5: 83 7c 24 58 64 cmpl $0x64,0x58(%rsp)
400fda: 77 05 ja 400fe1 <phase_2+0x94>
400fdc: e8 ae 06 00 00 callq 40168f <explode_bomb>
400fe1: 48 8d 74 24 30 lea 0x30(%rsp),%rsi
400fe6: 8b 7c 24 5c mov 0x5c(%rsp),%edi
400fea: e8 3b ff ff ff callq 400f2a <func2a>
400fef: 48 89 e6 mov %rsp,%rsi
400ff2: 8b 7c 24 58 mov 0x58(%rsp),%edi
400ff6: e8 2f ff ff ff callq 400f2a <func2a>
400ffb: bb 00 00 00 00 mov $0x0,%ebx
401000: 8b 04 1c mov (%rsp,%rbx,1),%eax
401003: 39 44 1c 30 cmp %eax,0x30(%rsp,%rbx,1)
401007: 74 05 je 40100e <phase_2+0xc1>
401009: e8 81 06 00 00 callq 40168f <explode_bomb>
40100e: 48 83 c3 04 add $0x4,%rbx
401012: 48 83 fb 28 cmp $0x28,%rbx
401016: 75 e8 jne 401000 <phase_2+0xb3>
401018: 48 83 c4 60 add $0x60,%rsp
40101c: 5b pop %rbx
40101d: c3 retq
I completely understand what phase_2 is doing, I just don't understand what func2a is doing and how it affects the values at 0x30(%rsp) and so on. Because of this I always get to the comparison statement at 0x401003, and the bomb eventually explodes there.
My problem is I don't understand how the input (phase solution) is affecting the values at 0x30(%rsp) via func2a.
400f2a: 85 ff test %edi,%edi
400f2c: 74 1d je 400f4b <func2a+0x21>
This is just an early exit for when edi is zero (je is the same as jz).
400f2e: b9 cd cc cc cc mov $0xcccccccd,%ecx
400f33: 89 f8 mov %edi,%eax
400f35: f7 e1 mul %ecx
400f37: c1 ea 03 shr $0x3,%edx
This is a classic optimization trick; it is the integer arithmetic equivalent of dividing by multiplying by the inverse (see here for details); in practice, here it's the same as saying edx = edi / 10;
400f3a: 8d 04 92 lea (%rdx,%rdx,4),%eax
400f3d: 01 c0 add %eax,%eax
Here it is exploiting lea to perform arithmetic (and it's way clearer in Intel syntax, where it is lea eax,[rdx+rdx*4] => eax = edx*5), then sums the result with itself. It all boils down to eax = edx*10.
400f3f: 29 c7 sub %eax,%edi
Then, subtract it back to edi.
So, all in all this is a complicated (but fast) way to compute the last decimal digit of edi; what we have until now is something like:
void func2a(unsigned edi) {
if(edi==0) return;
label1:
edx=edi/10;
edi%=10;
// ...
}
(label1: is there because 400f33 is a jump target later)
Going on:
400f41: 83 04 be 01 addl $0x1,(%rsi,%rdi,4)
Again, this is way clearer to me in Intel syntax - add dword [rsi+rdi*4],byte +0x1. It is a regular increment into an array of 32-bit int (rdi is multiplied by 4); so, we can imagine that rsi points to an array of integers, indexed with the just-calculated last digit of edi.
void func2a(unsigned edi, int rsi[]) {
if(edi==0) return;
label1:
edx=edi/10;
edi%=10;
rsi[edi]++;
}
Then:
400f45: 89 d7 mov %edx,%edi
400f47: 85 d2 test %edx,%edx
400f49: 75 e8 jne 400f33 <func2a+0x9>
Move the result of the division we calculated above to edi, and loop if it's different from zero.
400f4b: f3 c3 repz retq
Return (using an unusual encoding of the instruction that is optimal for certain AMD processors).
So, by rewriting the jumps with a while loop and giving some meaningful names...
// number is edi, digits_count is rsi, as per regular
// x64 SystemV calling convention
void count_digits(unsigned number, int digits_count[]) {
while(number) {
digits_count[number%10]++;
number/=10;
}
}
I.e., this is a function that, given an integer, counts the occurrences of the single decimal digits, by incrementing the corresponding buckets in the digits_count array.
Fun fact: if we give the C code above to gcc (almost any recent version at -O1) we obtain back exactly the assembly you provided.
The following is my C file:
int main()
{
return 36;
}
It contains only return statement. But if I use the size command, it shows the
output like this:
mohanraj#ltsp63:~/Development/chap8$ size a.out
text data bss dec hex filename
1056 252 8 1316 524 a.out
mohanraj#ltsp63:~/Development/chap8$
Even though my program does not contain any global variable, or undeclared data. But, the output shows data segment have 252 and the bss have 8 bytes. So, why the output is like this? what is 252 and 8 refers.
Size Command
First see the definition of each column:
text - Actual machine instructions that your CPU going to execute. Linux allows to share this data.
data - All initialized variables (declarations) declared in a program (e.g., float salary=123.45;).
bss - The BSS consists of uninitialized data such as arrays that you have not set any values to or null pointers.
As Blue Moon said. On Linux, the execution starts by calling _start() function. Which does environment setup. Every C program has hidden "libraries" that depends on compilator you using. There are settings for global parameters, exit calls and after complete configuration it finally calls your main() function.
ASFAIK there's no way to see how your code looks encapsulated with configuration and _start() function. But I can show you that even your code contains more information than you thought the closer to hardware we are.
Hint:
Type readelf -a a.out to see how much information your exec really carrying.
What is inside?
Do not compare code in your source file to the size of executable file, it depends on the OS, compilator, and used libraries.
In my example, with exactly the same code, SIZE returns:
eryk#eryk-pc:~$ gcc a.c
eryk#eryk-pc:~$ size a.out
text data bss dec hex filename
1033 276 4 1313 521 a.out
Let's see what is inside...
eryk#eryk-pc:~$ gcc -S a.c
This will run the preprocessor over a.c, perform the initial compilation and then stop before the assembler is run.
eryk#eryk-pc:~$ cat a.s
.file "a.c"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
movl $36, %eax
popl %ebp
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
.section .note.GNU-stack,"",#progbits
Then look on the assembly code
eryk#eryk-pc:~$ objdump -d -M intel -S a.out
a.out: file format elf32-i386
Disassembly of section .init:
08048294 <_init>:
8048294: 53 push ebx
8048295: 83 ec 08 sub esp,0x8
8048298: e8 83 00 00 00 call 8048320 <__x86.get_pc_thunk.bx>
804829d: 81 c3 63 1d 00 00 add ebx,0x1d63
80482a3: 8b 83 fc ff ff ff mov eax,DWORD PTR [ebx-0x4]
80482a9: 85 c0 test eax,eax
80482ab: 74 05 je 80482b2 <_init+0x1e>
80482ad: e8 1e 00 00 00 call 80482d0 <__gmon_start__#plt>
80482b2: 83 c4 08 add esp,0x8
80482b5: 5b pop ebx
80482b6: c3 ret
Disassembly of section .plt:
080482c0 <__gmon_start__#plt-0x10>:
80482c0: ff 35 04 a0 04 08 push DWORD PTR ds:0x804a004
80482c6: ff 25 08 a0 04 08 jmp DWORD PTR ds:0x804a008
80482cc: 00 00 add BYTE PTR [eax],al
...
080482d0 <__gmon_start__#plt>:
80482d0: ff 25 0c a0 04 08 jmp DWORD PTR ds:0x804a00c
80482d6: 68 00 00 00 00 push 0x0
80482db: e9 e0 ff ff ff jmp 80482c0 <_init+0x2c>
080482e0 <__libc_start_main#plt>:
80482e0: ff 25 10 a0 04 08 jmp DWORD PTR ds:0x804a010
80482e6: 68 08 00 00 00 push 0x8
80482eb: e9 d0 ff ff ff jmp 80482c0 <_init+0x2c>
Disassembly of section .text:
080482f0 <_start>:
80482f0: 31 ed xor ebp,ebp
80482f2: 5e pop esi
80482f3: 89 e1 mov ecx,esp
80482f5: 83 e4 f0 and esp,0xfffffff0
80482f8: 50 push eax
80482f9: 54 push esp
80482fa: 52 push edx
80482fb: 68 70 84 04 08 push 0x8048470
8048300: 68 00 84 04 08 push 0x8048400
8048305: 51 push ecx
8048306: 56 push esi
8048307: 68 ed 83 04 08 push 0x80483ed
804830c: e8 cf ff ff ff call 80482e0 <__libc_start_main#plt>
8048311: f4 hlt
8048312: 66 90 xchg ax,ax
8048314: 66 90 xchg ax,ax
8048316: 66 90 xchg ax,ax
8048318: 66 90 xchg ax,ax
804831a: 66 90 xchg ax,ax
804831c: 66 90 xchg ax,ax
804831e: 66 90 xchg ax,ax
08048320 <__x86.get_pc_thunk.bx>:
8048320: 8b 1c 24 mov ebx,DWORD PTR [esp]
8048323: c3 ret
8048324: 66 90 xchg ax,ax
8048326: 66 90 xchg ax,ax
8048328: 66 90 xchg ax,ax
804832a: 66 90 xchg ax,ax
804832c: 66 90 xchg ax,ax
804832e: 66 90 xchg ax,ax
08048330 <deregister_tm_clones>:
8048330: b8 1f a0 04 08 mov eax,0x804a01f
8048335: 2d 1c a0 04 08 sub eax,0x804a01c
804833a: 83 f8 06 cmp eax,0x6
804833d: 77 01 ja 8048340 <deregister_tm_clones+0x10>
804833f: c3 ret
8048340: b8 00 00 00 00 mov eax,0x0
8048345: 85 c0 test eax,eax
8048347: 74 f6 je 804833f <deregister_tm_clones+0xf>
8048349: 55 push ebp
804834a: 89 e5 mov ebp,esp
804834c: 83 ec 18 sub esp,0x18
804834f: c7 04 24 1c a0 04 08 mov DWORD PTR [esp],0x804a01c
8048356: ff d0 call eax
8048358: c9 leave
8048359: c3 ret
804835a: 8d b6 00 00 00 00 lea esi,[esi+0x0]
08048360 <register_tm_clones>:
8048360: b8 1c a0 04 08 mov eax,0x804a01c
8048365: 2d 1c a0 04 08 sub eax,0x804a01c
804836a: c1 f8 02 sar eax,0x2
804836d: 89 c2 mov edx,eax
804836f: c1 ea 1f shr edx,0x1f
8048372: 01 d0 add eax,edx
8048374: d1 f8 sar eax,1
8048376: 75 01 jne 8048379 <register_tm_clones+0x19>
8048378: c3 ret
8048379: ba 00 00 00 00 mov edx,0x0
804837e: 85 d2 test edx,edx
8048380: 74 f6 je 8048378 <register_tm_clones+0x18>
8048382: 55 push ebp
8048383: 89 e5 mov ebp,esp
8048385: 83 ec 18 sub esp,0x18
8048388: 89 44 24 04 mov DWORD PTR [esp+0x4],eax
804838c: c7 04 24 1c a0 04 08 mov DWORD PTR [esp],0x804a01c
8048393: ff d2 call edx
8048395: c9 leave
8048396: c3 ret
8048397: 89 f6 mov esi,esi
8048399: 8d bc 27 00 00 00 00 lea edi,[edi+eiz*1+0x0]
080483a0 <__do_global_dtors_aux>:
80483a0: 80 3d 1c a0 04 08 00 cmp BYTE PTR ds:0x804a01c,0x0
80483a7: 75 13 jne 80483bc <__do_global_dtors_aux+0x1c>
80483a9: 55 push ebp
80483aa: 89 e5 mov ebp,esp
80483ac: 83 ec 08 sub esp,0x8
80483af: e8 7c ff ff ff call 8048330 <deregister_tm_clones>
80483b4: c6 05 1c a0 04 08 01 mov BYTE PTR ds:0x804a01c,0x1
80483bb: c9 leave
80483bc: f3 c3 repz ret
80483be: 66 90 xchg ax,ax
080483c0 <frame_dummy>:
80483c0: a1 10 9f 04 08 mov eax,ds:0x8049f10
80483c5: 85 c0 test eax,eax
80483c7: 74 1f je 80483e8 <frame_dummy+0x28>
80483c9: b8 00 00 00 00 mov eax,0x0
80483ce: 85 c0 test eax,eax
80483d0: 74 16 je 80483e8 <frame_dummy+0x28>
80483d2: 55 push ebp
80483d3: 89 e5 mov ebp,esp
80483d5: 83 ec 18 sub esp,0x18
80483d8: c7 04 24 10 9f 04 08 mov DWORD PTR [esp],0x8049f10
80483df: ff d0 call eax
80483e1: c9 leave
80483e2: e9 79 ff ff ff jmp 8048360 <register_tm_clones>
80483e7: 90 nop
80483e8: e9 73 ff ff ff jmp 8048360 <register_tm_clones>
080483ed <main>:
80483ed: 55 push ebp
80483ee: 89 e5 mov ebp,esp
80483f0: b8 24 00 00 00 mov eax,0x24
80483f5: 5d pop ebp
80483f6: c3 ret
80483f7: 66 90 xchg ax,ax
80483f9: 66 90 xchg ax,ax
80483fb: 66 90 xchg ax,ax
80483fd: 66 90 xchg ax,ax
80483ff: 90 nop
08048400 <__libc_csu_init>:
8048400: 55 push ebp
8048401: 57 push edi
8048402: 31 ff xor edi,edi
8048404: 56 push esi
8048405: 53 push ebx
8048406: e8 15 ff ff ff call 8048320 <__x86.get_pc_thunk.bx>
804840b: 81 c3 f5 1b 00 00 add ebx,0x1bf5
8048411: 83 ec 1c sub esp,0x1c
8048414: 8b 6c 24 30 mov ebp,DWORD PTR [esp+0x30]
8048418: 8d b3 0c ff ff ff lea esi,[ebx-0xf4]
804841e: e8 71 fe ff ff call 8048294 <_init>
8048423: 8d 83 08 ff ff ff lea eax,[ebx-0xf8]
8048429: 29 c6 sub esi,eax
804842b: c1 fe 02 sar esi,0x2
804842e: 85 f6 test esi,esi
8048430: 74 27 je 8048459 <__libc_csu_init+0x59>
8048432: 8d b6 00 00 00 00 lea esi,[esi+0x0]
8048438: 8b 44 24 38 mov eax,DWORD PTR [esp+0x38]
804843c: 89 2c 24 mov DWORD PTR [esp],ebp
804843f: 89 44 24 08 mov DWORD PTR [esp+0x8],eax
8048443: 8b 44 24 34 mov eax,DWORD PTR [esp+0x34]
8048447: 89 44 24 04 mov DWORD PTR [esp+0x4],eax
804844b: ff 94 bb 08 ff ff ff call DWORD PTR [ebx+edi*4-0xf8]
8048452: 83 c7 01 add edi,0x1
8048455: 39 f7 cmp edi,esi
8048457: 75 df jne 8048438 <__libc_csu_init+0x38>
8048459: 83 c4 1c add esp,0x1c
804845c: 5b pop ebx
804845d: 5e pop esi
804845e: 5f pop edi
804845f: 5d pop ebp
8048460: c3 ret
8048461: eb 0d jmp 8048470 <__libc_csu_fini>
8048463: 90 nop
8048464: 90 nop
8048465: 90 nop
8048466: 90 nop
8048467: 90 nop
8048468: 90 nop
8048469: 90 nop
804846a: 90 nop
804846b: 90 nop
804846c: 90 nop
804846d: 90 nop
804846e: 90 nop
804846f: 90 nop
08048470 <__libc_csu_fini>:
8048470: f3 c3 repz ret
Disassembly of section .fini:
08048474 <_fini>:
8048474: 53 push ebx
8048475: 83 ec 08 sub esp,0x8
8048478: e8 a3 fe ff ff call 8048320 <__x86.get_pc_thunk.bx>
804847d: 81 c3 83 1b 00 00 add ebx,0x1b83
8048483: 83 c4 08 add esp,0x8
8048486: 5b pop ebx
8048487: c3 ret
Next step would converting above code to 01 notation.
As you can see. Even simple c program contains complicated operation the closer to hardware your code is. I hope I have explained to you why the executable file is bigger than you thought. If you have any doubts, feel free to comment my post. I will edit my answer immediately.
I have an inner function in a larger program that is somehow changing a float value to "nan" when I expect it to be zero. I have trimmed the function down to the simplest form, with no parameters:
static void func(void)
{
int a = 1;
float x = 0.0f;
float v = 0.0f;
printf("x(%f), ", x);
x += (float)a * v;
printf("x(%f), ", x);
printf("(int)x: %d, ", (int)x);
}
This gives the output:
x(0.000000), x(nan), (int)x: -2147483648
If I remove the variable a and hardcode the value (1), the nan value goes away. Similarly, if I remove the line x += (float)a * v;, everything prints as expected (all zeroes).
The frustrating part is that I can't reproduce this by just creating a new program and tossing this in main(). When I try that, the program works perfectly and outputs:
x(0.000000), x(0.000000), (int)x: 0
Disassembly from the function in the actual program:
00000029 <_func>:
29: 55 push %ebp
2a: 89 e5 mov %esp,%ebp
2c: 83 ec 38 sub $0x38,%esp
2f: c7 45 f4 01 00 00 00 movl $0x1,-0xc(%ebp)
36: a1 18 00 00 00 mov 0x18,%eax
3b: 89 45 f0 mov %eax,-0x10(%ebp)
3e: a1 18 00 00 00 mov 0x18,%eax
43: 89 45 ec mov %eax,-0x14(%ebp)
46: d9 45 f0 flds -0x10(%ebp)
49: dd 5c 24 04 fstpl 0x4(%esp)
4d: c7 04 24 00 00 00 00 movl $0x0,(%esp)
54: e8 a7 ff ff ff call 0 <_printf>
59: d9 45 f0 flds -0x10(%ebp)
5c: db 45 f4 fildl -0xc(%ebp)
5f: d9 5d e4 fstps -0x1c(%ebp)
62: d9 45 e4 flds -0x1c(%ebp)
65: d9 45 ec flds -0x14(%ebp)
68: de c9 fmulp %st,%st(1)
6a: de c1 faddp %st,%st(1)
6c: d9 5d f0 fstps -0x10(%ebp)
6f: d9 45 f0 flds -0x10(%ebp)
72: dd 5c 24 04 fstpl 0x4(%esp)
76: c7 04 24 00 00 00 00 movl $0x0,(%esp)
7d: e8 7e ff ff ff call 0 <_printf>
82: d9 45 f0 flds -0x10(%ebp)
85: d9 7d e2 fnstcw -0x1e(%ebp)
88: 0f b7 45 e2 movzwl -0x1e(%ebp),%eax
8c: b4 0c mov $0xc,%ah
8e: 66 89 45 e0 mov %ax,-0x20(%ebp)
92: d9 6d e0 fldcw -0x20(%ebp)
95: db 5d dc fistpl -0x24(%ebp)
98: d9 6d e2 fldcw -0x1e(%ebp)
9b: 8b 45 dc mov -0x24(%ebp),%eax
9e: 89 44 24 04 mov %eax,0x4(%esp)
a2: c7 04 24 08 00 00 00 movl $0x8,(%esp)
a9: e8 52 ff ff ff call 0 <_printf>
ae: c9 leave
af: c3 ret
Disassembly from stand-alone function (as main()):
00000000 <_main>:
0: 55 push %ebp
1: 89 e5 mov %esp,%ebp
3: 83 e4 f0 and $0xfffffff0,%esp
6: 83 ec 30 sub $0x30,%esp
9: e8 00 00 00 00 call e <_main+0xe>
e: c7 44 24 2c 01 00 00 movl $0x1,0x2c(%esp)
15: 00
16: a1 18 00 00 00 mov 0x18,%eax
1b: 89 44 24 28 mov %eax,0x28(%esp)
1f: a1 18 00 00 00 mov 0x18,%eax
24: 89 44 24 24 mov %eax,0x24(%esp)
28: d9 44 24 28 flds 0x28(%esp)
2c: dd 5c 24 04 fstpl 0x4(%esp)
30: c7 04 24 00 00 00 00 movl $0x0,(%esp)
37: e8 00 00 00 00 call 3c <_main+0x3c>
3c: db 44 24 2c fildl 0x2c(%esp)
40: d8 4c 24 24 fmuls 0x24(%esp)
44: d9 44 24 28 flds 0x28(%esp)
48: de c1 faddp %st,%st(1)
4a: d9 5c 24 28 fstps 0x28(%esp)
4e: d9 44 24 28 flds 0x28(%esp)
52: dd 5c 24 04 fstpl 0x4(%esp)
56: c7 04 24 00 00 00 00 movl $0x0,(%esp)
5d: e8 00 00 00 00 call 62 <_main+0x62>
62: d9 44 24 28 flds 0x28(%esp)
66: d9 7c 24 1e fnstcw 0x1e(%esp)
6a: 0f b7 44 24 1e movzwl 0x1e(%esp),%eax
6f: b4 0c mov $0xc,%ah
71: 66 89 44 24 1c mov %ax,0x1c(%esp)
76: d9 6c 24 1c fldcw 0x1c(%esp)
7a: db 5c 24 18 fistpl 0x18(%esp)
7e: d9 6c 24 1e fldcw 0x1e(%esp)
82: 8b 44 24 18 mov 0x18(%esp),%eax
86: 89 44 24 04 mov %eax,0x4(%esp)
8a: c7 04 24 08 00 00 00 movl $0x8,(%esp)
91: e8 00 00 00 00 call 96 <_main+0x96>
96: b8 00 00 00 00 mov $0x0,%eax
9b: c9 leave
9c: c3 ret
9d: 90 nop
9e: 90 nop
9f: 90 nop
This issue is often the result of undefined behavior. In this specific instance, there was an implicit function declaration (a header file hadn't been included elsewhere in the program) which caused UB, and resulted in this bug.