Related
I'm doing an x86 assembly project for class and we're supposed to implement a heap of personnel records. The call heap_swap line is giving me trouble. If I uncomment it, it throws a seg fault. However, the heap_swap function works fine no matter how I test it. I've really racked my brain and would appreciate any help anyone can give!
sift_up1:
# ecx = i
# rdx = address to heap
# r9 = address to heap[i]
# rax = offset of id
# r8 = address for heap[i].id_number
# r10d = heap[i].id_number
# r11d = index of parent
# rdx = address for parent id number
# ebx = heap[parent].id_number
pushq %rbp
movq %rsp, %rbp
subq $32, %rsp
pushq %rbx #a section to keep track of all the callee saved registers
pushq %rdi #that need to be restored
leaq offset_of_id(%rip), %rax #put the id offset into a register
leaq heap(%rip), %rdx
jmp LOOP_TOP
LOOP_TOP:
cmpl $0, %ecx #Check if i=0, if so jump to exit loop
je EXIT_LOOP
movl $8, %r9d
imull %ecx, %r9d #finding heap[i]
addq (%rdx), %r9
movq %r9, %r8 #r8 contains heap[i]
addq (%rax), %r8 #add id offset, it becomes heap[i].id_number
movl (%r8), %r10d #dereference id_number and place it into r10d
movl %ecx, %r11d #find the index of the parent of i
subl $1, %r11d
shrl $1, %r11d
movl $8, %edi
imull %r11d, %edi
addq (%rdx), %rdi #rdi holds the address of heap[parent]
addq (%rax), %rdi #rdi holds the address of heap[parent].id_number
movl (%rdi), %ebx #ebx holds the heap[parent].id_number
cmpl %ebx, %r10d
jle EXIT_LOOP
pushq %rdx
movq %r11, %rdx #put the indexes in the correct parameter functions
# call heap_swap #call heap_swap
popq %rdx
movl %r11d, %ecx #modify i
jmp LOOP_TOP #jump to loop top
I am a huge fan of network protocols and libnet, which is why I've been trying to imitate some network protocols that are not included by libnet. Capturing packets, imitating headers etc works so far. Now I need a way to actually write these exact packets to my network card. I've tried libnet_adv_write_rawipv4() and -link(), both won't work. I can't cull the headers with libnet_adv_cull_header() because of the stupid errors and bugs. So I figured, that the problem could be solved with a little assembly: get the assembly code for the actual libnet_build() and libnet_write() call, alter some bytes and voila: raw bytes get written to the network card. So I have written a dummy program:
#include <stdio.h>
#include <stdlib.h>
#include <libnet.h>
int main() {
libnet_t *l;
l = libnet_init(LIBNET_RAW4, 0, NULL);
libnet_build_tcp(2000, 450, 0, 1234, TH_SYN, 254, 0, NULL, LIBNET_TCP_H + 5,
"aaaaa", 5, l, 0);
libnet_build_ipv4(LIBNET_TCP_H + LIBNET_IPV4_H + 5, 0, 1, 0, 64, 6, 0,
2186848448, 22587584, NULL, 0, l, 0);
libnet_write(l);
return 0;
}
Works so far. Now I got the assembly version of the program using
gcc -o program program.c -S
And this is where the actual problem starts:
.LC0:
.string "aaaaa"
.text
.globl main
.type main, #function
main:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl $0, %edx
movl $0, %esi
movl $1, %edi
call libnet_init
movq %rax, -8(%rbp)
subq $8, %rsp
pushq $0
pushq -8(%rbp)
pushq $5
pushq $.LC0
pushq $25
pushq $0
pushq $0
movl $254, %r9d
movl $2, %r8d
movl $1234, %ecx
movl $0, %edx
movl $450, %esi
movl $2000, %edi
call libnet_build_tcp
addq $64, %rsp
subq $8, %rsp
pushq $0
pushq -8(%rbp)
pushq $0
pushq $0
pushq $22587584
pushq $-2108118848
pushq $0
movl $6, %r9d
movl $64, %r8d
movl $0, %ecx
movl $1, %edx
movl $0, %esi
movl $45, %edi
call libnet_build_ipv4
addq $64, %rsp
movq -8(%rbp), %rax
movq %rax, %rdi
call libnet_write
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size main, .-main
See this?
call libnet_build_ipv4
I can't copy the assembly code of these build() or write() calls, because all there is is a reference to them. Now, where would I find the assembly code for these pre-written functions included in libnet-functions.h (libnet_build_ipv4(), libnet_build_tcp(), libnet_write()) ?????
GDB is your friend in situations like this. You don't say anything about what platform you're on, the following example works on Ubuntu, but should work similarly on other distributions.
First, make sure that you have debug-symbols for libnet installed:
sudo apt install libnet1-dbg
Find out where libnet is installed:
~$ dpkg -L libnet1 | grep \.so
/usr/lib/x86_64-linux-gnu/libnet.so.1.7.0
/usr/lib/x86_64-linux-gnu/libnet.so.1
Open it (or your own application) with GDB:
~$ gdb /usr/lib/x86_64-linux-gnu/libnet.so.1.7.0
Reading symbols from /usr/lib/x86_64-linux-gnu/libnet.so.1.7.0...Reading symbols from /usr/lib/debug//usr/lib/x86_64-linux-gnu/libnet.so.1.7.0...done.
done.
Use the disassemble command to inspect anything you like:
(gdb) disassemble libnet_build_ipv4
Dump of assembler code for function libnet_build_ipv4:
0x0000000000007d60 <+0>: push %r15
0x0000000000007d62 <+2>: push %r14
0x0000000000007d64 <+4>: push %r13
0x0000000000007d66 <+6>: push %r12
0x0000000000007d68 <+8>: push %rbp
0x0000000000007d69 <+9>: push %rbx
0x0000000000007d6a <+10>: sub $0x48,%rsp
0x0000000000007d6e <+14>: mov 0xa8(%rsp),%rbx
0x0000000000007d76 <+22>: mov %edx,0x8(%rsp)
0x0000000000007d7a <+26>: mov %fs:0x28,%rax
0x0000000000007d83 <+35>: mov %rax,0x38(%rsp)
0x0000000000007d88 <+40>: xor %eax,%eax
0x0000000000007d8a <+42>: mov %ecx,0x14(%rsp)
0x0000000000007d8e <+46>: mov 0x80(%rsp),%r14d
0x0000000000007d96 <+54>: test %rbx,%rbx
0x0000000000007d99 <+57>: mov 0x98(%rsp),%r15
0x0000000000007da1 <+65>: je 0x810a <libnet_build_ipv4+938>
0x0000000000007da7 <+71>: mov %esi,%r13d
0x0000000000007daa <+74>: mov 0xb0(%rsp),%esi
0x0000000000007db1 <+81>: mov %edi,%ebp
0x0000000000007db3 <+83>: mov $0xd,%ecx
0x0000000000007db8 <+88>: mov $0x14,%edx
0x0000000000007dbd <+93>: mov %rbx,%rdi
0x0000000000007dc0 <+96>: mov %r9d,0x1c(%rsp)
0x0000000000007dc5 <+101>: mov %r8d,0x18(%rsp)
0x0000000000007dca <+106>: callq 0xea10 <libnet_pblock_probe>
0x0000000000007dcf <+111>: test %rax,%rax
---Type <return> to continue, or q <return> to quit---q
Quit
(gdb)
I encountered the following code in my computer architecture class:
void mystery( long A[], long B[], long n )
{
long i;
for ( i = 0; i < n; i++ ) {
B[i] = A[n-(i+1)];
}
}
And my professor showed the corresponding assembly code GCC generates on an Ubuntu machine and he seems to be confused as well:
mystery:
pushq %rbp
movq %rsp, %rbp
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movq %rdx, -40(%rbp)
movq $0, -8(%rbp)
jmp .L2
.L3:
movq -8(%rbp), %rax
leaq 0(,%rax,8), %rdx
movq -32(%rbp), %rax
addq %rax, %rdx
movq -8(%rbp), %rax
notq %rax
movq %rax, %rcx
movq -40(%rbp), %rax
addq %rcx, %rax
leaq 0(,%rax,8), %rcx
movq -24(%rbp), %rax
addq %rcx, %rax
movq (%rax), %rax
movq %rax, (%rdx)
addq $1, -8(%rbp)
.L2:
movq -8(%rbp), %rax
cmpq -40(%rbp), %rax
jl .L3
popq %rbp
ret
But I can't understand why the compiler will generate this code. It appears the A, B, and n are pushed onto the stack but the stack pointer %rsp doesn't change its value. Also, -16(%rbp) also seems to be allocated but is never put in a value. Is there any reason GCC will behave this way?
Compiler Explorer (godbolt.org) is a great tool to look at generated assembly from various compilers and with different flags. Here's what g++7 -O2 produces for your code:
mystery(long*, long*, long):
test rdx, rdx
jle .L1
lea rax, [rdi-8+rdx*8]
sub rdi, 8
.L3:
mov rdx, QWORD PTR [rax]
sub rax, 8
add rsi, 8
mov QWORD PTR [rsi-8], rdx
cmp rax, rdi
jne .L3
.L1:
rep ret
To answer your question: compiling with optimizations disabled usually unexpected/less sensible output. "Why?" is a difficult question to answer as this highly depends on how the compiler is implemented.
Here's a screenshot showing a comparison of -O2, -O0 and -Ofast:
Try it out here: https://godbolt.org/g/pQ637a
I'm trying to create a green thread implementation based off this tutorial, However my switch function is giving me a segfault because the code to load the registers is not run at the end of the function. Here is my code:
void ThreadSwitch(Thread in, Thread out) {
if (!out && !in) {
return;
}
if (out) {
// save registers for out
}
if (in) {
SetCurrentThread(in);
mtx_lock(&in->mutex);
uint64_t rsp = in->cpu.rsp;
uint64_t r15 = in->cpu.r15;
uint64_t r14 = in->cpu.r14;
uint64_t r13 = in->cpu.r13;
uint64_t r12 = in->cpu.r12;
uint64_t rbx = in->cpu.rbx;
uint64_t rbp = in->cpu.rbp;
mtx_unlock(&in->mutex);
asm volatile("mov %[rsp], %%rsp\n"
"mov %[r15], %%r15\n"
"mov %[r14], %%r14\n"
"mov %[r13], %%r13\n"
"mov %[r12], %%r12\n"
"mov %[rbx], %%rbx\n"
"mov %[rbp], %%rbp\n" : : [rsp] "r"(rsp), [r15] "r"(r15), [r14] "r"(r14), [r13] "r"(r13), [r12] "r"(r12), [rbx] "r"(rbx), [rbp] "r"(rbp));
}
}
Xcode says that the inline assembly is causing a segfault, but my lldb disassembly looks like this (you can ignore 95% of it, just provided for context):
0x1000f88b4: movq -0x8(%rbp), %rdi
0x1000f88b8: callq 0x1000f83a0 ; SetCurrentThread at thread.cc:21
0x1000f88bd: movq -0x8(%rbp), %rdi
0x1000f88c1: addq $0x50, %rdi
0x1000f88c8: callq 0x1000f7b80 ; mtx_lock at tct.c:106
0x1000f88cd: movq -0x8(%rbp), %rdi
0x1000f88d1: movq (%rdi), %rdi
0x1000f88d4: movq %rdi, -0x18(%rbp)
0x1000f88d8: movq -0x8(%rbp), %rdi
0x1000f88dc: movq 0x8(%rdi), %rdi
0x1000f88e0: movq %rdi, -0x20(%rbp)
0x1000f88e4: movq -0x8(%rbp), %rdi
0x1000f88e8: movq 0x10(%rdi), %rdi
0x1000f88ec: movq %rdi, -0x28(%rbp)
0x1000f88f0: movq -0x8(%rbp), %rdi
0x1000f88f4: movq 0x18(%rdi), %rdi
0x1000f88f8: movq %rdi, -0x30(%rbp)
0x1000f88fc: movq -0x8(%rbp), %rdi
0x1000f8900: movq 0x20(%rdi), %rdi
0x1000f8904: movq %rdi, -0x38(%rbp)
0x1000f8908: movq -0x8(%rbp), %rdi
0x1000f890c: movq 0x28(%rdi), %rdi
0x1000f8910: movq %rdi, -0x40(%rbp)
0x1000f8914: movq -0x8(%rbp), %rdi
0x1000f8918: movq 0x30(%rdi), %rdi
0x1000f891c: movq %rdi, -0x48(%rbp)
0x1000f8920: movq -0x8(%rbp), %rdi
0x1000f8924: addq $0x50, %rdi
0x1000f892b: movl %eax, -0x54(%rbp)
0x1000f892e: callq 0x1000f7de0 ; mtx_unlock at tct.c:264
0x1000f8933: movq -0x18(%rbp), %rdi ; beginning of inline asm
0x1000f8937: movq -0x20(%rbp), %rcx
0x1000f893b: movq -0x28(%rbp), %rdx
0x1000f893f: movq -0x30(%rbp), %rsi
0x1000f8943: movq -0x38(%rbp), %r8
0x1000f8947: movq -0x40(%rbp), %r9
0x1000f894b: movq -0x48(%rbp), %r10
0x1000f894f: movq %rdi, %rsp
0x1000f8952: movq %rcx, %r15
0x1000f8955: movq %rdx, %r14
0x1000f8958: movq %rsi, %r13
0x1000f895b: movq %r8, %r12
0x1000f895e: movq %r9, %rbx
0x1000f8961: movq %r10, %rbp ; end of inline asm
-> 0x1000f8964: movl %eax, -0x58(%rbp)
0x1000f8967: addq $0x60, %rsp
0x1000f896b: popq %rbp
0x1000f896c: retq
The segfault happens when it tries to access stuff back on the stack, which makes sense because it just switched out the stack. But why is the compiler inserting this? The compiler also stores %eax on the stack at 0x1000f892b. Is the compiler opening up a register? Because it doesn't use %rax in the inline asm. Is there a workaround?
This is using Apple LLVM version 6.0 (clang-600.0.57) on OSX 10.10.2, if that's any help.
Thanks in advance.
I strongly advise you not to write programs that depend on undefined behaviour.
Jumps into and out of inline assembly are not permitted as the compiler can't analyse control flow it doesn't know about, upon thread creation you jump into the asm statement from nowhere then leaves it. To avoid these implicit jumps you need to save and restore the registers including %rip in the same asm statement.
All registers that an asm statement alters must be listed as outputs or clobbers, for a thread switch routine that is all the registers whose values are not saved, as they are altered by the other threads. If you do not do so the compiler will incorrectly assume that they are not altered.
An asm statement must avoid overwriting it's inputs before they are used, in your code there is nothing prohibiting the compiler from storing the variable r12 in the register %r14.
Your lock is either pointless or inadequate.
It is much simpler to write your function entirely in assembly, like in tutorial you cite.
I am trying to write a function (max) in 64 bit assembly and I don't know what I am doing wrong, maybe some of you guys can determine what I am doing wrong :/
Here's the function:
int max(int a, int b) {
/* Return the larger of the two integers `a' and `b'. */
if (a > b)
return a;
else
return b;
}
And here is my assembly code (with comments):
push %rbp
mov %rsp, %rbp
mov %rdi, %rax
mov %rsi, %rcx
test %rax, %rax // Checking if first parameter is signed
js .signedRAX
test %rcx, %rcx // Checking if second parameter is signed
js .signedRCX
jmp .compare // If either one signed then jump to .compare
.signedRAX:
test %rcx, %rcx // Checking if both are signed
js .signedRAXandRCX
mov %rcx, %rax // If not then return the positive number
jmp .end // finish the function
.signedRCX:
jmp .end // If only the second parameter is signed then jump
.signedRAXandRCX: // straight to end of function and return %rax
cmp %rax, %rcx // If both are signed compare which one is the max
jl .end
mov %rcx, %rax
jmp .end
.compare:
cmp %rax, %rcx // If both are positive then compare which one is
jg .end // the max
mov %rcx, %rax
.end:
mov %rbp, %rsp
pop %rbp
ret
I am getting the wrong output when comparing two parameters that are both signed and then both positive.
You work nuch too complicated.
If I input your program to gcc -S, I get
max:
.LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
movl 8(%ebp), %eax
cmpl 12(%ebp), %eax
jle .L2
movl 8(%ebp), %eax
jmp .L3
.L2:
movl 12(%ebp), %eax
.L3:
popl %ebp
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
If I take over your "ABI" and way of passing arguments,
I get
max:
push %rbp
mov %rsp, %rbp
mov %rdi, %rax
mov %rsi, %rcx
cmpl %rcx, %rax
jle .L2
movl %rcx, %rax
.L2:
mov %rbp, %rsp
pop %rbp
ret
Here is equivalent C-pseudo code for ASM from question. You can see, for a >= 0 and b < 0 it return b. For a < 0 and b >= 0 it return a. That's incorrect. There may be another errors in the code, because so simple operation encoded really messy. It's really hard to see anything in the code. Don't make simple things so complex. Follow KISS principle.
// test %rax, %rax
// js .signedRAX
if (a >= 0) {
// test %rcx, %rcx
// js .signedRCX
if (b >= 0) {
// .compare
// cmp %rax, %rcx // If both are positive then compare which one is
// jg .end // the max
if (a > b) {
b = a;
}
return b;
} else {
// .signedRCX
return b;
}
} else {
// .signedRAX
// test %rcx, %rcx // Checking if both are signed
// js .signedRAXandRCX
if (b >= 0) {
b = a;
return b;
} else {
// .signedRAXandRCX
// cmp %rax, %rcx // If both are signed compare which one is the max
// jl .end
if (a < b) {
b = a;
}
return b;
}
}