QEMU call goes to wrong address

QEMU call goes to wrong address - c

I've been working on a small osdev project. So far i've gotten to running C code with A20, GDT, protected mode (32-bit) and disk loading, but function calls are not working. I've confirmed the actual binary has no problems (ndisasm -b 32 lizard.bin):
... irrelevant bootloader code ...
00000200 8D4C2404 lea ecx,[esp+0x4]
00000204 83E4F0 and esp,byte -0x10
00000207 FF71FC push dword [ecx-0x4]
0000020A 55 push ebp
0000020B 89E5 mov ebp,esp
0000020D 51 push ecx
0000020E 83EC14 sub esp,byte +0x14
00000211 C745F400000000 mov dword [ebp-0xc],0x0
00000218 83EC0C sub esp,byte +0xc
0000021B 8D45F4 lea eax,[ebp-0xc]
0000021E 50 push eax
0000021F E82F000000 call 0x253
00000224 83C410 add esp,byte +0x10
00000227 8945F4 mov [ebp-0xc],eax
0000022A FA cli
0000022B F4 hlt
0000022C 83EC0C sub esp,byte +0xc
0000022F 8D45F4 lea eax,[ebp-0xc]
00000232 50 push eax
00000233 E81B000000 call 0x253
00000238 83C410 add esp,byte +0x10
0000023B 8945F4 mov [ebp-0xc],eax
0000023E 83EC0C sub esp,byte +0xc
00000241 8D45F4 lea eax,[ebp-0xc]
00000244 50 push eax
00000245 E809000000 call 0x253
0000024A 83C410 add esp,byte +0x10
0000024D 8945F4 mov [ebp-0xc],eax
00000250 90 nop
00000251 EBFD jmp short 0x250
00000253 55 push ebp
00000254 89E5 mov ebp,esp
00000256 83EC10 sub esp,byte +0x10
00000259 FA cli
0000025A F4 hlt
0000025B C745FC01000000 mov dword [ebp-0x4],0x1
00000262 8B55FC mov edx,[ebp-0x4]
00000265 89D0 mov eax,edx
00000267 C1E002 shl eax,byte 0x2
0000026A 01D0 add eax,edx
0000026C 8945FC mov [ebp-0x4],eax
0000026F 8B55FC mov edx,[ebp-0x4]
00000272 89D0 mov eax,edx
00000274 C1E003 shl eax,byte 0x3
00000277 29D0 sub eax,edx
00000279 8945FC mov [ebp-0x4],eax
0000027C 836DFC06 sub dword [ebp-0x4],byte +0x6
00000280 8B55FC mov edx,[ebp-0x4]
00000283 89D0 mov eax,edx
00000285 C1E003 shl eax,byte 0x3
00000288 01D0 add eax,edx
0000028A 8945FC mov [ebp-0x4],eax
0000028D 8B4508 mov eax,[ebp+0x8]
00000290 8B55FC mov edx,[ebp-0x4]
00000293 8910 mov [eax],edx
00000295 8B45FC mov eax,[ebp-0x4]
00000298 C9 leave
00000299 C3 ret
The cli & hlt pairs are for debugging with qemu, qemu has not halted on them. As you can see the 3 call instructions are perfectly normal. However running qemu and running info registers produces:
QEMU 6.2.0 monitor - type 'help' for more information
(qemu) info registers
... irrelevant ...
EIP=00007e50 ... irrelevant ...
... irrelevant ...
As you can see, eip is 7e50, the infinite loop! This should not have happened, because there are cli and hlt instructions after the function call (not triggered) and the function (not triggered). If I use gdb, putting a breakpoint on 7e00, the memory address of the kernel, after that continuing and using si sees gdb go into a call to the function, only to have the next instruction be in the infinite loop!
Finally ill provide the files.
Makefile:
PRINTDIRECTORY = --no-print-directory
BOOTLOADER-PARTFILE = int/parts/boot.prt
BOOTLOADER-OBJECTFILE = int/boot.o
BOOTLOADER-SOURCEFILE = src/boot.s
KERNEL-PARTFILE = int/parts/detailed-boot.prt
KERNEL-OBJECTFILE = int/detailed-boot.o
KERNEL-SOURCEFILE = src/detailed-boot.c
GCC = ~/opt/cross/bin/i686-elf-gcc
LD = ~/opt/cross/bin/i686-elf-ld
VM = qemu-system-i386
SYSFILE = lizard.bin
full:
make bootloader $(PRINTDIRECTORY)
make kernel $(PRINTDIRECTORY)
truncate -s 32768 ./int/parts/detailed-boot.prt
make join $(PRINTDIRECTORY)
bootloader:
as -o $(BOOTLOADER-OBJECTFILE) $(BOOTLOADER-SOURCEFILE)
ld -o $(BOOTLOADER-PARTFILE) --oformat binary -e init $(BOOTLOADER-OBJECTFILE) -Ttext 0x7c00
kernel:
$(GCC) -ffunction-sections -ffreestanding $(KERNEL-SOURCEFILE) -o $(KERNEL-OBJECTFILE) -nostdlib -Wall -Wextra -O0
$(LD) -o $(KERNEL-PARTFILE) -Ttext 0x7e00 --oformat binary $(KERNEL-OBJECTFILE) -e main --script=LDfile -O 0 -Ttext-segment 0x7e00
join:
cat $(BOOTLOADER-PARTFILE) $(KERNEL-PARTFILE) > $(SYSFILE)
run:
$(VM) $(SYSFILE)
debug:
$(VM) $(SYSFILE) -gdb tcp:localhost:6000 -S
LDfile:
ENTRY(main)
SECTIONS {
. = 0x7e00;
.text . : { *(.text) }
.data . : { *(.data) }
.bss . : { *(.bss ) }
}
src/detailed-boot.c:
//#include "stdc/stdbool.h"
//#include "stdc/stdio.h"
asm(".code32");
int a(int *d);
int main() {
int c = 0;
c = a(&c);
asm("cli");
asm("hlt");
c = a(&c);
c = a(&c);
while(1);
}
int a(int *d) {
asm("cli");
asm("hlt");
int b = 1;
b *= 5;
b *= 7;
b -= 6;
b *= 9;
*d = b;
return b;
}
//#include "stdc/stdio.c"
src/boot.s:
.code16 # 16 bit mode
.global init # make label init global
init:
call enableA20
reset:
mov $0x00, %ah # 0 = reset drive
mov $0x80, %dl # boot disk
int $0x13
jc reset
load:
mov $0x42, %ah # 42 = extended read
mov $0x8000, %si
xor %bx, %bx
movl $0x00007e00, %ds:4 (%si,1)
movl $0x00400010, %ds:0 (%si,1)
mov %cs, %ds:6 (%si,1)
movl $0x00000001, %ds:8 (%si,1) # start sector in lba
movl $0x00000000, %ds:12(%si,1) # start sector in lba
int $0x13
# 1. Disable interrupts
cli
# 2. Load GDT
lgdt (gdt_descriptor)
# set 32 bit mode
mov %cr0, %eax
or $1, %eax
mov %eax, %cr0
# Far jmp
jmp %cs:(code32)
checkA20:
push %ds
xor %ax, %ax
mov %ax, %ds
movw $0xAA55, %ax
movw $0x7DFE, %bx
movw (%bx), %bx
cmpw %ax, %bx
jnz checkA20_enabled
checkA20_disabled:
xor %ax, %ax
jmp checkA20_done
checkA20_enabled:
xor %ax, %ax
inc %ax
checkA20_done:
pop %ds
ret
enableA20:
call checkA20
jnz enableA20_enabled
enableA20_int15:
mov $0x2403, %ax # A20 gate support
int $0x15
jb enableA20_keyboardController # INT 15 aint supported
cmp $0, %ah
jnz enableA20_keyboardController # INT 15 aint supported
mov $0x2402, %ax # A20 status
int $0x15
jb enableA20_keyboardController # couldnt get status
cmp $0, %ah
jnz enableA20_keyboardController # couldnt get status
cmp $1, %al
jz enableA20_enabled # A20 is activated
mov $0x2401, %ax # A20 activation
int $0x15
jb enableA20_keyboardController # couldnt activate
cmp $0, %ah
jnz enableA20_keyboardController # couldnt activate
enableA20_keyboardController:
call checkA20
jnz enableA20_enabled
cli
call enableA20_wait
mov $0xAD, %al
out %al, $0x64
call enableA20_wait
mov $0xD0, %al
out %al, $64
call enableA20_wait2
in $0x60, %al
push %eax
call enableA20_wait
mov $0xD1, %al
out %al, $0x64
call enableA20_wait
pop %eax
or $2, %al
out %al, $0x60
call enableA20_wait
mov $0xAE, %al
out %al, $0x64
call enableA20_wait
sti
enableA20_fastA20:
call checkA20
jnz enableA20_enabled
in $0x92, %al
test $2, %al
jnz enableA20_postFastA20
or $2, %al
and $0xFE, %al
out %al, $92
enableA20_postFastA20:
call checkA20
jnz enableA20_enabled
cli
hlt
enableA20_enabled:
ret
enableA20_wait:
in $0x64, %al
test $2, %al
jnz enableA20_wait
ret
enableA20_wait2:
in $0x64, %al
test $1, %al
jnz enableA20_wait2
ret
setGDT: ret
# NOTE limit is the length
# NOTE base is the start
# NOTE base + limit = last address
gdt_start:
gdt_null:
# null descriptor
.quad 0
gdt_data:
.word 0x01c8 # limit: bits 0-15
.word 0x0000 # base: bits 0-15
.byte 0x00 # base: bits 16-23
# segment presence: yes (+0x80)
# descriptor priviledge level: ring 0 (+0x00)
# descriptor type: code/data (+0x10)
# executable: no (+0x00)
# direction bit: grows up (+0x00)
# writable bit: writable (+0x02)
# accesed bit [best left 0, cpu will deal with it]: no (+0x00)
.byte 0x80 + 0x10 + 0x02
# granularity flag: limit scaled by 4kib (+0x80)
# size flag: 32 bit pm (+0x40)
# long mode flag: 32pm/16pm/data (+0x00)
# reserved: reserved (+0x00)
.byte 0x80 + 0x40 # flags: granularity # 4-7 limit: bits 16-19 # 0-3
.byte 0x00 # base: bits 24-31
gdt_code:
.word 0x0100 # limit: bits 0-15
.word 0x8000 # base: bits 0-15
.byte 0x1c # base: bits 16-23
# segment presence: yes (+0x80)
# descriptor priviledge level: ring 0 (+0x00)
# descriptor type: code/data (+0x10)
# executable: yes (+0x08)
# conforming bit [0: only ring 0 can execute this]: no (+0x00)
# readable bit: yes (0x02)
# accessed bit [best left 0, cpu will deal with it]: no (0x00)
.byte 0x80 + 0x10 + 0x08 + 0x02
# granularity flag: limit scaled by 4kib (+0x80)
# size flag: 32 bit pm (+0x40)
# long mode flag: 32pm/16pm/data (+0x00)
# reserved: reserved (+0x00)
.byte 0x80 + 0x40 + 0x00 # flags: granularity # 4-7 limit: bits 16-19 # 0-3
.byte 0x00 # base: bits 24-31
gdt_end:
gdt_descriptor:
.word gdt_end - gdt_start - 1
.long gdt_start
.code32
code32:
mov %ds, %ax
mov %ax, %ds
# mov %ax, %ss
mov %ax, %es
mov %ax, %fs
mov %ax, %gs
movl $0x4000, %ebp
mov %esp, %ebp
push $0x7e00
ret
.fill 500-(.-init)
.quad 1
.word 1
.word 0xaa55
kernel:
I know that this is not a minimum scenario, I apologize.
I'll end this off by giving a link to the github repo: https://github.com/saltq144/lizard
and the cross compiler tutorial i followed: https://wiki.osdev.org/GCC_Cross-Compiler
To address some comments: I have not configured the IDT, an NMI would cause a triple fault or jump to garbage, not the loop. Trying to modify SS caused a triple fault from my limited testing. And I do agree that .code32 in the .c file is pointless, but the cross compiler is i686 so 64-bit code shouldn't be an issue, however i'll look into it.
Note:
Using inline assembly, I am able to insert two nop instructions to allow function calls to work. This is not an ideal solution, but it will have to work until this issue is fully resolved. Compiler optimizations may break this, but they haven't yet.

When you switch to protected mode (at the jmp %cs:(code32)) CS is loaded with "protected mode compatible" information from the GDT.
At the start of code32: all other segment registers contain real mode values. You copy the real mode value that is not compatible with protected mode from DS (at mov %ds, %ax) into all data segment registers. This real mode value from DS is probably 0x0000. In protected mode that refers to the "null descriptor".
This is why you can't do mov %ax, %ss - the CPU will not allow you to use "null descriptor" for the stack segment (it will give you a general protection fault instead). Because you don't load SS with protected mode compatible values, it's left using old values from real mode - an unknown base address, a 64 KiB segment limit, and a 16-bit default stack pointer size.
The consequence of all this is... as soon as you do any normal memory access (e.g. the push dword [ecx-0x4] which uses DS as an implied segment register like [ds: ecx-0x4]) you will get a general protection fault because DS is set to the null descriptor. Because you haven't set up a protected mode IDT the CPU will just use the values real mode was using for its IVT, causing CPU to think unknown trash (that would've been for "interrupt 0x1A" in real mode and not "interupt 0x0D" due to IDT entries being twice as big) is the IDT entry for the general protection fault handler. There's no easy way to predict what happens after that (maybe the unknown trash isn't a valid IDT entry for protected mode and it causes a double fault, maybe it is a "valid enough" IDT entry and you start executing garbage at an unknown address).

Related

What is stack guard page and probing stack?

I'm analysing how the compiler implements the variable-length array in c99. The following is my c code and disassembly which is commented on my understanding. The code is compiled with "-O3 -fomit-frame-pointer -fno-stack-protector -fpie"
c code:
# include<stdio.h>
int main() {
size_t sz; // never be signed
scanf("%zd", &sz);
volatile char s[sz+1]; // prevent to be optimized away.
s[sz] = '\0';
}
disassembly:
Reading symbols from a.out...
(gdb) disass main
Dump of assembler code for function main():
0x0000000000001060 <+0>: endbr64
0x0000000000001064 <+4>: push %rbp # save the current frame pointer.
0x0000000000001065 <+5>: lea 0xf98(%rip),%rdi # rdi = "%zd". 1st param
0x000000000000106c <+12>: xor %eax,%eax # eax = 0.
0x000000000000106e <+14>: mov %rsp,%rbp # set the new frame pointer.
0x0000000000001071 <+17>: sub $0x10,%rsp # allocate a 16 bytes. rsp is aligned by 16.
0x0000000000001075 <+21>: lea -0x8(%rbp),%rsi # rsi = &sz. 2nd param.
0x0000000000001079 <+25>: callq 0x1050 <__isoc99_scanf#plt> # call __isoc99_scanf
# volatile char s[sz+1]; // prevent to be optimized away.
0x000000000000107e <+30>: mov -0x8(%rbp),%rcx # rcx = sz
0x0000000000001082 <+34>: mov %rsp,%rdi # rdi = rsp.
0x0000000000001085 <+37>: lea 0x10(%rcx),%rax # rax = sz + 1 + 15
0x0000000000001089 <+41>: mov %rax,%rdx # rdx = sz + 1 + 15
0x000000000000108c <+44>: and $0xfffffffffffff000,%rax # be mutilple of 4096
0x0000000000001092 <+50>: sub %rax,%rdi # rdi is the address of the array s
0x0000000000001095 <+53>: and $0xfffffffffffffff0,%rdx # be multiple of 16
0x0000000000001099 <+57>: mov %rdi,%rax # rax = &s
0x000000000000109c <+60>: cmp %rax,%rsp # if sz+16 is less than 4096,
0x000000000000109f <+63>: je 0x10b6 <main()+86> # then jump to main+86 for
# the stack is grown as page size for every iteration of the loop.
0x00000000000010a1 <+65>: sub $0x1000,%rsp # grow the stack.
0x00000000000010a8 <+72>: orq $0x0,0xff8(%rsp) # probe stack(???).
0x00000000000010b1 <+81>: cmp %rax,%rsp # if rsp isn't equal to rax,
0x00000000000010b4 <+84>: jne 0x10a1 <main()+65> # then loop.
0x00000000000010b6 <+86>: and $0xfff,%edx # be less than 4096
0x00000000000010bc <+92>: sub %rdx,%rsp # allocate the remainder.
0x00000000000010bf <+95>: test %rdx,%rdx # if the remainder is not zero,
0x00000000000010c2 <+98>: jne 0x10cc <main()+108> # then, jump to probe stack(?).
0x00000000000010c4 <+100>: movb $0x0,(%rsp,%rcx,1) # s[sz] = '\0'
0x00000000000010c8 <+104>: xor %eax,%eax # eax = 0.
0x00000000000010ca <+106>: leaveq # restore the previous stack frame.
0x00000000000010cb <+107>: retq # return 0;
0x00000000000010cc <+108>: orq $0x0,-0x8(%rsp,%rdx,1) # probe stack(??).
0x00000000000010d2 <+114>: jmp 0x10c4 <main()+100> # jump back.
End of assembler dump.
"https://nullprogram.com/blog/2019/10/27/"
says that first, -fomit-frame-pointer is ignored because VLA have to track the stack-frame dynamically. Second, when -fstack-clash-protection is enabled the compiler generates extra code to probe every pages of allocation in case one of those pages is a guard page, etc..
But in my disassembly code, I don't understand these lines:
# the stack is grown as page size for every iteration of the loop.
0x00000000000010a1 <+65>: sub $0x1000,%rsp # grow the stack.
0x00000000000010a8 <+72>: orq $0x0,0xff8(%rsp) # probe stack(???).
0x00000000000010b1 <+81>: cmp %rax,%rsp # if rsp isn't equal to rax,
0x00000000000010b4 <+84>: jne 0x10a1 <main()+65> # then loop.
What does "orq $0x0, 0xff8(%rsp)" mean??. and what is probing stack?

Usually in normal operation stack is accessed sequentially as it grows. OS places guard (marked as non-existing) page at the end of stack space, so when stack overflows process tries to write to protected page and it causes segmentation fault. But when stack grows by more than one page at once stack pointer can jump over guard page and overflow wouldn't be detected, out-of-stack data may be overwritten. Probing every allocated page process ensures stack is not overflowed or segmentation fault occurres on overflow.
With -fstack-clash-protection and array larger than stack limit your program is terminated by segmentation fault:
$ ./stack-protected
16777216
Segmentation fault (core dumped)
With -fno-stack-clash-protection it countinue to work:
$ ./stack-unprotected
16777216
$
but some data out of stack is corrupted.
orq $0x0, 0xff8(%rsp) performs OR operation on 64-bit word in every page with value 0, i.e. writes to page without real data modification.

Return of syscall with wrong file descriptor is not negative [duplicate]

I'm having trouble finding the good documentation for writing 64-bit assembly on MacOS.
The 64-bit SysV ABI says the following in section A.2.1 and this SO post quotes it:
A system-call is done via the syscall instruction. The kernel destroys
registers %rcx and %r11.
Returning from the syscall, register %rax contains the result of the
system-call. A value in the range between -4095 and -1 indicates an error,
it is -errno.
Those two sentences are ok on Linux but are wrong on macOS Sierra with the following code:
global _start
extern _exit
section .text
_start:
; Align stack to 16 bytes for libc
and rsp, 0xFFFFFFFFFFFFFFF0
; Call write
mov rdx, 12 ; size
mov rsi, hello ; buf
mov edi, 1 ; fd
mov rax, 0x2000004 ; write ; replace to mov rax, 0x1 on linux
syscall
jc .err ; Jumps on error on macOS, but why?
jnc .ok
.err:
mov rdi, -1
call _exit ; exit(-1)
.ok:
; Expect rdx to be 12, but it isn't on macOS!
mov rdi, rdx
call _exit ; exit(rdx)
; String for write
section .data
hello:
.str db `Hello world\n`
.len equ $-hello.str
Compile with NASM:
; MacOS: nasm -f macho64 syscall.asm && ld syscall.o -lc -macosx_version_min 10.12 -e _start -o syscall
; Linux: nasm -f elf64 syscall.asm -o syscall.o && ld syscall.o -lc -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o syscall
Run on macOS:
./syscall # Return value 0
./syscall >&- # Return value 255 (-1)
I found out that:
A syscall return errno an sets the carry flag on error, instead of returning -errno in rax
rdx register is clobbered by syscall
On Linux, everything works as expected
Why is rdx clobbered? Why doesn't a syscall return -errno? Where can I find the real documentation?
The only place I found where someone talks about the carry flag for syscall errors is here

I used this:
# as hello.asm -o hello.o
# ld hello.o -macosx_version_min 10.13 -e _main -o hello -lSystem
.section __DATA,__data
str:
.asciz "Hello world!\n"
.section __TEXT,__text
.globl _main
_main:
movl $0x2000004, %eax # preparing system call 4
movl $1, %edi # STDOUT file descriptor is 1
movq str#GOTPCREL(%rip), %rsi # The value to print
movq $13, %rdx # the size of the value to print
syscall
movl %eax, %edi
movl $0x2000001, %eax # exit (return value of the call to write())
syscall
and was able to catch return value into eax. Here return value is the number of bytes actually written by write system call. And yes MacOS being a BSD variant it is the carry flag that tells you if the syscall was wrong or not (errno is just an external linkage variable).
# hello_asm.s
# as hello_asm.s -o hello_asm.o
# ld hello_asm.o -e _main -o hello_asm
.section __DATA,__data
str:
.asciz "Hello world!\n"
good:
.asciz "OK\n"
.section __TEXT,__text
.globl _main
_main:
movl $0x2000004, %eax # preparing system call 4
movl $5, %edi # STDOUT file descriptor is 5
movq str#GOTPCREL(%rip), %rsi # The value to print
movq $13, %rdx # the size of the value to print
syscall
jc err
movl $0x2000004, %eax # preparing system call 4
movl $1, %edi # STDOUT file descriptor is 1
movq good#GOTPCREL(%rip), %rsi # The value to print
movq $3, %rdx # the size of the value to print
syscall
movl $0, %edi
movl $0x2000001, %eax # exit 0
syscall
err:
movl $1, %edi
movl $0x2000001, %eax # exit 1
syscall
This will exits with error code one because descriptor 5 was used, if you try descriptor 1 then it will work printing another message and exiting with 0.

I don't know why rdx gets clobbered, just to confirm that it indeed does seem to get zeroed across the "write" systemcall. I examined the status of every register:
global _start
section .text
_start:
mov rax, 0xDEADBEEF; 0xDEADBEEF = 3735928559; 3735928559 mod 256 = 239
mov rbx, 0xDEADBEEF
mov rcx, 0xDEADBEEF
mov rdx, 0xDEADBEEF
mov rsi, 0xDEADBEEF
mov rdi, 0xDEADBEEF
mov rsp, 0xDEADBEEF
mov rbp, 0xDEADBEEF
mov r8, 0xDEADBEEF
mov r9, 0xDEADBEEF
mov r10, 0xDEADBEEF
mov r11, 0xDEADBEEF
mov r12, 0xDEADBEEF
mov r13, 0xDEADBEEF
mov r14, 0xDEADBEEF
mov r15, 0xDEADBEEF
mov rdx, len2 ; size
mov rsi, msg2 ; buf
mov rdi, 1 ; fd
mov rax, 0x2000004 ; write
syscall
mov rdi, rsi ; CHANGE THIS TO EXAMINE DIFFERENT REGISTERS
mov rax, 0x2000001 ; exit
syscall
section .data
msg_pad db `aaaa\n` ; to make the buffer not to be page-aligned
msg2 db `bbbbbb\n` ; because then it's easier to notice whether
len2 equ $-msg2 ; clobbered or not
nasm -f macho64 syscall.asm && ld syscall.o -e _start -static && ./a.out; echo "status: $?"
The results I got:
clobber list of a "write" syscall
rax clobbered
rbx not clobbered
rcx clobbered
rdx clobbered <- This is the unexpected case?!
rsi not clobbered
rdi not clobbered
rsp not clobbered
rbp not clobbered
r8 not clobbered
r9 not clobbered
r10 not clobbered
r11 clobbered
r12 not clobbered
r13 not clobbered
r14 not clobbered
r15 not clobbered
It would be interesting to know other syscalls zero rdx too, I didn't have the energy to attempt a thorough investigation. But maybe, just to be safe, one should add rdx to the clobber list of all of the MacOS syscalls from now on.

what is meant "seg fs" in the linux kernel bootsect.s file

When I reading the early Linux kernel code, I encountered a problem in boot/bootsect.s that was difficult to understanding."seg fs" ,What is it doing? If I want to change to AT&T's assembly syntax, How should I do!
go: mov ax,cs
mov dx,#0x4000-12 ! 0x4000 is arbitrary value >= length of
! bootsect + length of setup + room for stack
! 12 is disk parm size
! bde - changed 0xff00 to 0x4000 to use debugger at 0x6400 up (bde). We
! wouldn't have to worry about this if we checked the top of memory. Also
! my BIOS can be configured to put the wini drive tables in high memory
! instead of in the vector table. The old stack might have clobbered the
! drive table.
mov ds,ax
mov es,ax
mov ss,ax ! put stack at INITSEG:0x4000-12.
mov sp,dx
/*
* Many BIOS's default disk parameter tables will not
* recognize multi-sector reads beyond the maximum sector number
* specified in the default diskette parameter tables - this may
* mean 7 sectors in some cases.
*
* Since single sector reads are slow and out of the question,
* we must take care of this by creating new parameter tables
* (for the first disk) in RAM. We will set the maximum sector
* count to 18 - the most we will encounter on an HD 1.44.
*
* High doesn't hurt. Low does.
*
* Segments are as follows: ds=es=ss=cs - INITSEG,
* fs = 0, gs = parameter table segment
*/
push #0
pop fs
mov bx,#0x78 ! fs:bx is parameter table address
seg fs
lgs si,(bx) ! gs:si is source
mov di,dx ! es:di is destination
mov cx,#6 ! copy 12 bytes
cld
rep
seg gs
movsw
mov di,dx
movb 4(di),*18 ! patch sector count
seg fs
mov (bx),di
seg fs
mov 2(bx),es
mov ax,cs
mov fs,ax
mov gs,ax
xor ah,ah ! reset FDC
xor dl,dl
int 0x13

I assume it assembles as a fs prefix for the next instruction. That would match the comments, and is the only thing that makes sense.
Should be easy enough to build it and disassemble (into AT&T syntax if you want).
In AT&T syntax, you can just use fs as a prefix to other mnemonics.
fs movsw
assembles to this (in 64-bit mode. 16-bit mode would skip the 66 operand-size prefix).
0000000000000000 <.text>:
0: 64 66 a5 movsw %fs:(%rsi),%es:(%rdi)

who can tell me to change this to be correct or not? !
__go:
movw %cs , %ax
movw $0x4000-12 , %dx
movw %ax , %ds
movw %ax , %es
movw %ax , %ss
movw %dx , %sp
pushw $0x0000
popw %fs
movw $0x0078 , %bx
lgs %fs:(%bx) , %si
movw %dx , %di
movw $0x0006 , %cx
cld
rep
movw %dx , %di
movw $0x12 , 0x0004(%di)
movw %di , %fs:(%bx)
movw %es , %fs:0x0002(%bx)
movw %cs , %ax
movw %ax , %fs
movw %ax , %gs
xorb %ah , %ah
xorb %dl , %dl
int $0x13

Assembly - Error: junk '40' after expression

I'm using an i686-elf-as gcc cross compiler and it's failing to compile an assembly file.
The file is used alongside grub to boot my own operating system but when i try defining any globals or enter the _irq part it spits out tons of errors which are mainly
boot.s:78: Error: no such instruction: `irq4'
boot.s:81: Error: junk `0' after expression
boot.s:82: Error: junk `36' after expression
How would I stop this from happening?
Below is the entire boot.s file
# Declare constants used for creating a multiboot header.
.set ALIGN, 1<<0
.set MEMINFO, 1<<1
.set FLAGS, ALIGN | MEMINFO
.set MAGIC, 0x1BADB002
.set CHECKSUM, -(MAGIC + FLAGS)
.section .multiboot
.align 4
.long MAGIC
.long FLAGS
.long CHECKSUM
.section .bootstrap_stack, "aw", #nobits
stack_bottom:
.skip 16384 # 16 KiB
stack_top:
.section .text
.global _start
.type _start, #function
_start:
movl $stack_top, %esp
call kernel_main
cli
hlt
.Lhang:
jmp .Lhang
.global _irq0
.global _irq1
.global _irq2
.global _irq3
.global _irq4
.global _irq5
.global _irq6
.global _irq7
.global _irq8
.global _irq9
.global _irq10
.global _irq11
.global _irq12
.global _irq13
.global _irq14
.global _irq15
_irq0:
cli
push byte 0
push byte 32
jmp irq_common_stub
_irq1:
cli
push byte 0
push byte 33
jmp irq_common_stub
_irq2:
cli
push byte 0
push byte 34
jmp irq_common_stub
_irq3:
cli
push byte 0
push byte 35
jmp irq_common_stub
_irq4:
cli
push byte 0
push byte 36
jmp irq_common_stub
_irq5:
cli
push byte 0
push byte 37
jmp irq_common_stub
_irq6:
cli
push byte 0
push byte 38
jmp irq_common_stub
_irq7:
cli
push byte 0
push byte 39
jmp irq_common_stub
_irq8:
cli
push byte 0
push byte 40
jmp irq_common_stub
_irq9:
cli
push byte 0
push byte 41
jmp irq_common_stub
_irq10:
cli
push byte 0
push byte 42
jmp irq_common_stub
_irq11:
cli
push byte 0
push byte 43
jmp irq_common_stub
_irq12:
cli
push byte 0
push byte 44
jmp irq_common_stub
_irq13:
cli
push byte 0
push byte 45
jmp irq_common_stub
_irq14:
cli
push byte 0
push byte 46
jmp irq_common_stub
_irq15:
cli
push byte 0
push byte 47
jmp irq_common_stub
extern _irq_handler
irq_common_stub:
pusha
push %ds
push %es
push %fs
push %gs
mov %ax, 0x10
mov %ds, %ax
mov %es, %ax
mov %fs, %ax
mov %gs, %ax
mov %eax, %esp
push %eax
mov %eax, _irq_handler
call eax
pop %eax
pop %gs
pop %fs
pop %es
pop %ds
popa
add %esp, 8
iret
.size _start, . - _start

You're mixing Intel and AT&T syntax assembly language. GNU as uses AT&T syntax traditionally. Intel syntax is that used by assemblers such as NASM, MASM, YASM, and historical assemblers designed for the x86 platform.
movl $stack_top, %esp is a perfectly valid example of AT&T syntax assembly language. push byte 35 is a perfectly valid example of Intel syntax assembly language. The two syntaxes, however, are incompatible, and cannot be combined.
I recommend looking up an assembly language tutorial that uses as on Linux, and learning how to use assembly language in the first place before jumping into something as complex and headache-inducing as systems development. ;)
http://asm.sourceforge.net/ -- Perhaps this tutorial/resource site could be of use to you. Good luck!

Need help understanding Linux kernel's BIOS interrupt calls

I am studying Linux source code to find out how it gets a memory map. I think it starts by calling detect_memory() which is defined here. This function calls detect_memory_e820() which is defined in the same file. detect_memory_e820() at line 48 calls intcall which is defined like this:
.code16gcc
.text
.globl intcall
.type intcall, #function
intcall:
/* Self-modify the INT instruction. Ugly, but works. */
cmpb %al, 3f
je 1f
movb %al, 3f
jmp 1f /* Synchronize pipeline */
1:
/* Save state */
pushfl
pushw %fs
pushw %gs
pushal
/* Copy input state to stack frame */
subw $44, %sp
movw %dx, %si
movw %sp, %di
movw $11, %cx
rep; movsd
/* Pop full state from the stack */
popal
popw %gs
popw %fs
popw %es
popw %ds
popfl
/* Actual INT */
.byte 0xcd /* INT opcode */
3: .byte 0
/* Push full state to the stack */
pushfl
pushw %ds
pushw %es
pushw %fs
pushw %gs
pushal
/* Re-establish C environment invariants */
cld
movzwl %sp, %esp
movw %cs, %ax
movw %ax, %ds
movw %ax, %es
/* Copy output state from stack frame */
movw 68(%esp), %di /* Original %cx == 3rd argument */
andw %di, %di
jz 4f
movw %sp, %si
movw $11, %cx
rep; movsd
4: addw $44, %sp
/* Restore state and return */
popal
popw %gs
popw %fs
popfl
retl
.size intcall, .-intcall
My problem is that I cannot figure out what is the value of dx register at this point: movw %dx, %si and from where it comes.

Notice that the makefile specifies -mregparm=3 for the compilation of 16 bit C code. This instructs the compiler to put the first 3 arguments into the registers eax, edx and ecx if possible. So the value of dx is going to be the second argument, &iregs. Also notice the comment further down that confirms this: /* Original %cx == 3rd argument */
I find it kind of funny that you had no problem right at the start, about how al gets the value of the interrupt number :)

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight