Disassembly a C code [closed] - c

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 8 years ago.
Improve this question
How is it possbile to disassembly a C Code? I already read a few Questions here (stackoverflow). But if you want to disassembly you need a Machine code so how do this with nasm ? So if I create for ex. an Hello World in C how can do this ?

Nasm is a bad idea. There are a few options. IDA pro has given me some success, but if you really know your assembly, you can nm for symbols, then hexdump the code from there and manually make assembly out of it. There really isn't just a way to use nasm to produce recompilable code though.
otool (or objdump) will produce assembly.
If you need some examples: here:
#include <stdio.h>
main(argc, argv)
int argc; char * * argv;
{
printf("Hello, World\n");
}
nm output:
hydrogen:tmp phyrrus9$ nm a.out
0000000100000000 T __mh_execute_header
0000000100000f40 T _main
U _printf
U dyld_stub_binder
otool output:
hydrogen:tmp phyrrus9$ otool -tv a.out
a.out:
(__TEXT,__text) section
_main:
0000000100000f40 pushq %rbp
0000000100000f41 movq %rsp, %rbp
0000000100000f44 subq $0x10, %rsp
0000000100000f48 leaq 0x37(%rip), %rdi ; this is our string
0000000100000f4f movb $0x0, %al
0000000100000f51 callq 0x100000f66 ; call printf
0000000100000f56 movl $0x0, %ecx
0000000100000f5b movl %eax, 0xfffffffffffffffc(%rbp)
0000000100000f5e movl %ecx, %eax
0000000100000f60 addq $0x10, %rsp
0000000100000f64 popq %rbp
0000000100000f65 ret
hexdump output not shown.
Actual assembly:
hydrogen:tmp phyrrus9$ cat tmp.s
.section __TEXT,__text,regular,pure_instructions
.globl _main
.align 4, 0x90
_main: ## #main
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq L_.str(%rip), %rdi
movb $0, %al
callq _printf
movl $0, %ecx
movl %eax, -4(%rbp) ## 4-byte Spill
movl %ecx, %eax
addq $16, %rsp
popq %rbp
ret
.cfi_endproc
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "Hello, world!\n"
.subsections_via_symbols
Hope this helps you get a grasp.

Related

Decrementing stack by 24 when only 8 bytes are needed?

I have the C code:
long fib(long n) {
if (n < 2) return 1;
return fib(n-1) + fib(n-2);
}
int main(int argc, char** argv) {
return 0;
}
which I compiled by running gcc -O0 -fno-optimize-sibling-calls -S file.c yielding assembly code that has not been optimized:
.file "long.c"
.text
.globl fib
.type fib, #function
fib:
.LFB5:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
subq $24, %rsp
.cfi_offset 3, -24
movq %rdi, -24(%rbp)
cmpq $1, -24(%rbp)
jg .L2
movl $1, %eax
jmp .L3
.L2:
movq -24(%rbp), %rax
subq $1, %rax
movq %rax, %rdi
call fib
movq %rax, %rbx
movq -24(%rbp), %rax
subq $2, %rax
movq %rax, %rdi
call fib
addq %rbx, %rax
.L3:
addq $24, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE5:
.size fib, .-fib
.globl main
.type main, #function
main:
.LFB6:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movq %rsi, -16(%rbp)
movl $0, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE6:
.size main, .-main
.ident "GCC: (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0"
.section .note.GNU-stack,"",#progbits
My question is:
Why do we decrement the stack pointer by 24, subq $24, %rsp? As I see it, we store one element only, first argument n in %rdi, on the stack after the initial two pushes. So why don't we just decrement the stack pointer by 8 and then move n to -8(%rbp)? So
subq $8, %rsp
movq %rdi, -8(%rbp)
GCC does not fully optimize with -O0, not even its stack use. (This may aid in debugging by making some of its use of the stack more transparent to humans. For example, objects a, b, and c may share a single stack location if their active lifetimes (defined by uses in the program, not by the model of lifetime in the C standard) with -O3, but may have separately reserved places in the stack with -O0, and that makes it easier for a human to see where a, b, and c are used in the assembly code. The wasted 16 bytes may be a side effect of this, as those spaces may be reserved for some purpose that this small function did not happen to use, such as space to save certain registers if needed.)
Changing optimization to -O3 results in GCC subtracting only eight from the stack pointer.

Can't change return address of the function x64

I am trying to change the return address of some function in C for skipping one instruction. I am doing this on virtual machine with Ubuntu Server (because on Mac gcc doesn't let me turn off stack protection).
I am compiling my code with gcc:
gcc –g –fno-stack-protector –z execstack –o bufover bufover.c
This is the code:
void foo(int a, int b, int c) {
char buff[256];
long *ret, *ret2;
ret = buff + 256 + 8;
(*ret) += 5;
}
int main() {
char x;
x = '0';
foo(1,2,3);
x = '1';
printf("%c\n",x);
}
To the address of buff I added 256 (size of buff) and 8 (size of %RBP). Before this on the stack should be return address.
Next I added 5 bytes to the address that, because I checked using gdb that next instruction is in 5 bytes.
But it doesn't work...
I am analyzing the variables (addresses) step by step using gdb but I am not seeing any mistake.
Any idea?
Edit: Assembly code:
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 10, 14
.globl _foo ## -- Begin function foo
.p2align 4, 0x90
_foo: ## #foo
Lfunc_begin0:
.file 1 "me.c"
.loc 1 3 0 ## me.c:3:0
.cfi_startproc
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $160, %rsp
leaq -272(%rbp), %rax
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl %edx, -12(%rbp)
Ltmp0:
##DEBUG_VALUE: foo:buff <- [%rax+0]
.loc 1 7 19 prologue_end ## me.c:7:19
addq $256, %rax ## imm = 0x100
Ltmp1:
.loc 1 7 25 is_stmt 0 ## me.c:7:25
addq $8, %rax
.loc 1 7 12 ## me.c:7:12
movq %rax, -280(%rbp)
.loc 1 8 10 is_stmt 1 ## me.c:8:10
movq -280(%rbp), %rax
.loc 1 8 15 is_stmt 0 ## me.c:8:15
movq (%rax), %rcx
addq $5, %rcx
movq %rcx, (%rax)
.loc 1 9 5 is_stmt 1 ## me.c:9:5
addq $160, %rsp
popq %rbp
retq
Ltmp2:
Lfunc_end0:
.cfi_endproc
## -- End function
.globl _main ## -- Begin function main
.p2align 4, 0x90
_main: ## #main
Lfunc_begin1:
.loc 1 11 0 ## me.c:11:0
.cfi_startproc
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movl $1, %edi
movl $2, %esi
movl $3, %edx
Ltmp3:
.loc 1 13 9 prologue_end ## me.c:13:9
movb $48, -1(%rbp)
.loc 1 14 7 ## me.c:14:7
callq _foo
leaq L_.str(%rip), %rdi
.loc 1 15 9 ## me.c:15:9
movb $49, -1(%rbp)
.loc 1 16 21 ## me.c:16:21
movsbl -1(%rbp), %esi
.loc 1 16 7 is_stmt 0 ## me.c:16:7
movb $0, %al
callq _printf
xorl %edx, %edx
.loc 1 17 5 is_stmt 1 ## me.c:17:5
movl %eax, -8(%rbp) ## 4-byte Spill
movl %edx, %eax
addq $16, %rsp
popq %rbp
retq
Ltmp4:
Lfunc_end1:
.cfi_endproc
## -- End function
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "%c\n"
.section __DWARF,__debug_str,regular,debug
You should use GCC return-address related builtins like __builtin_frame_address or __builtin_return_address, and you should carefully study the x86-64 ABI specification to understand in details the relevant x86 calling conventions.
Try also to understand them by writing some C code in foo.c, and compiling it with gcc -O -fverbose-asm -S foo.c then looking into the generated foo.s
At last, the stack segment is usually not executable (this could matter for some trampoline techniques). Read about the NX bit. On Linux, learn to use mprotect(2), mmap(2), backtrace(3).
There is no guarantee that GCC even uses any call stack. It could optimize to avoid using it (tail-call optimization could happen sometimes), and your code might not even need additional call frames. So of course you cannot achieve your goal in standard C, or without additional hypothesis on your particular GCC compiler (and GCC 8 and GCC 9 could optimize differently).
Of course, changing the return address is undefined behavior.

Testing C code by compiling with -s switch? [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 6 years ago.
Improve this question
I want to test my code (I know my code is still incomplete -- yes I am planning to complete it before I compile it) to see if it gives the correct assembly code by compiling with -s switch, how do I do this?
I am not very familiar with compiling. All I did so far was save my file. Now I need to compile it to be able to run it.
typedef enum {MODE_A, MODE_B, MODE_C, MODE_D, MODE_E} mode_t;
long switch3 (long *p1, long *p2, mode_t action) {
long result = 0;
switch(action){
case MODE_A:
case MODE_B:
case MODE_C:
case MODE_D:
case MODE_E:
default:; // don't forget the colon
}
return result;
}
Open an editor, Vi or Emacs for example
Type and save your code in a file, maybe main.c
Exit the editor
Type gcc -S main.c or clang -S main.c in the terminal. You can also add a -fverbose-asm flag to tell the complier to add more information in the output, or a -masm=intel flag to inspect the assembly output much nicer.
On success, a file named main.s will be generated under the current directory, containing the assembly code; on failure, error messages will be printed on the screen.
Also note that your C code will only be compiled when it's compilable, so you have to modify your code first. At least, change default; to default:;
Here is the assembly code produced by clang -S main.c on my machine:
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.globl _switch3
.align 4, 0x90
_switch3: ## #switch3
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movl %edx, -20(%rbp)
movq $0, -32(%rbp)
movl -20(%rbp), %edx
subl $4, %edx
movl %edx, -36(%rbp) ## 4-byte Spill
ja LBB0_2
jmp LBB0_1
LBB0_1:
jmp LBB0_2
LBB0_2:
jmp LBB0_3
LBB0_3:
movq -32(%rbp), %rax
popq %rbp
retq
.cfi_endproc
.subsections_via_symbols
To compile without linking using GNU Compiler Collection (gcc) you can use the -S switch:
jan#jsn-dev:~/src/so> gcc -S main.c
main.c: In function ‘switch3’:
main.c:11:12: error: expected ‘:’ before ‘;’ token
default;
^
After correcting your code with the suggested fix, you get:
jan#jsn-dev:~/src/so> gcc -S main.c
jan#jsn-dev:~/src/so> cat main.s
.file "main.c"
.text
.globl switch3
.type switch3, #function
switch3:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movl %edx, -36(%rbp)
movq $0, -8(%rbp)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size switch3, .-switch3
.ident "GCC: (SUSE Linux) 4.8.3 20140627 [gcc-4_8-branch revision 212064]"
.section .note.GNU-stack,"",#progbits

Incrementing a variable through embedded assembly language

I am trying to understand how to embed assembly language in C (using gcc on x86_64 architecture). I wrote this program to increment the value of a single variable. But I am getting garbage value as output. And ideas why?
#include <stdio.h>
int main(void) {
int x;
x = 4;
asm("incl %0": "=r"(x): "r0"(x));
printf("%d", x);
return 0;
}
Thanks
Update The program is giving expected result on gcc 4.8.3 but not on gcc 4.6.3. I am pasting the assembly output of the non-working code:
.file "abc.c"
.section .rodata
.LC0:
.string "%d"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
subq $24, %rsp
movl $4, -20(%rbp)
movl -20(%rbp), %eax
incl %edx
movl %edx, %ebx
.cfi_offset 3, -24
movl %ebx, -20(%rbp)
movl $.LC0, %eax
movl -20(%rbp), %edx
movl %edx, %esi
movq %rax, %rdi
movl $0, %eax
call printf
movl $0, %eax
addq $24, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",#progbits
You don't need to say x twice; once is sufficient:
asm("incl %0": "+r"(x));
The +r says that the value will be input and output.
Your way, with separate inputs and output registers, requires that you take the input from %1, add one, and write the output to %0, but you can't do that with incl.
The reason it works on some compilers is because GCC is free to allocate both %0 and %1 to the same register, and appears to have done so in those cases, but it does not have to. Incidentally, if you want to prevent GCC allocating an input and output to the same register (say, if you want to initialize the output before using the input to calculate a final output), you need to use the & modifier.
The documentation for the modifiers is here.

Why GCC didn't optimize this tail call?

I have the code working with lined lists. I use tail calls. Unfortunately, GCC does not optimise the calls.
Here is C code of the function that recursively calculates length of the linked list:
size_t ll_length(const ll_t* list) {
return ll_length_rec(list, 0);
}
size_t ll_length_rec(const ll_t* list, size_t size_so_far)
{
if (list) {
return ll_length_rec(list->next, size_so_far + 1);
} else {
return size_so_far;
}
}
and here is the assembler code:
.globl _ll_length_rec
_ll_length_rec:
LFB8:
.loc 1 47 0
pushq %rbp
LCFI6:
movq %rsp, %rbp
LCFI7:
subq $32, %rsp
LCFI8:
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
.loc 1 48 0
cmpq $0, -8(%rbp)
je L8
.loc 1 49 0
movq -16(%rbp), %rsi
incq %rsi
movq -8(%rbp), %rax
movq 8(%rax), %rdi
call _ll_length_rec # < THIS SHOUD BE OPTIMIZED
movq %rax, -24(%rbp)
jmp L10
If GCC would optimize it, there would be no call in the asm. I compile it with:
gcc -S -fnested-functions -foptimize-sibling-calls \
-03 -g -Wall -o llist llist.c
and GCC version is:
i686-apple-darwin10-gcc-4.2.1 (GCC) 4.2.1 (Apple Inc. build 5666) (dot 3)
If I add -O3 to your compilation line, it does not seem to generate the offending call, while without it, I get the unoptimised call. I don't know all gcc options in my head, but is -03 a typo for -O3 or intentional?
Ltmp2:
pushq %rbp
Ltmp0:
movq %rsp, %rbp
Ltmp1:
jmp LBB1_1
.align 4, 0x90
LBB1_3:
addq $2, %rsi
Ltmp3:
movq (%rax), %rdi
Ltmp4:
LBB1_1:
Ltmp5:
testq %rdi, %rdi
je LBB1_5
Ltmp6:
movq (%rdi), %rax
testq %rax, %rax
jne LBB1_3
incq %rsi
LBB1_5:
movq %rsi, %rax
Ltmp7:
Ltmp8:
popq %rbp
ret
Most likely because neither of your functions are declared as static, which means that the symbols must be visible to the linker in case any other compilation units need them at link time. Try to compile with the -fwhole-program flag and see what happens.
Probably depends on the version of GCC and specific build. This is what I get from GCC 3.4.4 on Windows starting from -O2 and up
.globl _ll_length_rec
.def _ll_length_rec; .scl 2; .type 32; .endef
_ll_length_rec:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %edx
movl 12(%ebp), %eax
jmp L3
.p2align 4,,7
L6:
movl (%edx), %edx
incl %eax
L3:
testl %edx, %edx
jne L6
popl %ebp
ret

Resources