C to ASM increment causes Segfault - c

I have the following C program:
#include <stdio.h>
int main() {
int i = 0;
int N = 10;
while(i < N) {
printf("counting to %d: %d", N, i);
//i = i + 1;
}
return 0;
}
I would like to compile this first to assembly, then to binary for instructional purposes. So, I issue the following commands:
$ gcc -S count.c -o count.s
$ as -o count.o count.s
$ ld -o count -e main -dynamic-linker /lib64/ld-linux-x86-64.so.2 /usr/lib/x86_64-linux-gnu/libc.so count.o -lc
These compile the C to assembly, assemble the assembly to binary, and then link the library containing the printf function, respectively.
This works. Output:
counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0counting to 10: 0
etc until I ctrl-c the program.
However, when I uncomment the i = i + 1 line:
Segmentation fault (core dumped)
What is going wrong here?
UPDATE: Here is count.s (with the i = i + 1 line included)
.file "count.c"
.text
.section .rodata
.LC0:
.string "counting to %d: %d"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl $0, -8(%rbp)
movl $10, -4(%rbp)
jmp .L2
.L3:
movl -8(%rbp), %edx
movl -4(%rbp), %eax
movl %eax, %esi
leaq .LC0(%rip), %rdi
movl $0, %eax
call printf#PLT
addl $1, -8(%rbp)
.L2:
movl -8(%rbp), %eax
cmpl -4(%rbp), %eax
jl .L3
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0"
.section .note.GNU-stack,"",#progbits

The below works perfectly fine for me on Ubuntu 20 (taken from Ciro Santilli's answer at Linking a C program directly with ld fails with undefined reference to `__libc_csu_fini`).
gcc -S count.c -o count.s
as -o count.o count.s
ld -o count -dynamic-linker /lib64/ld-linux-x86-64.so.2 /usr/lib/x86_64-linux-gnu/crt1.o /usr/lib/x86_64-linux-gnu/crti.o -L/usr/lib/gcc/x86_64-linux-gnu/4.8/ -lc count.o /usr/lib/x86_64-linux-gnu/crtn.o

If you on Linux 64 add at the end of the main function:
mov eax, 60
xor edi, edi
syscall
On linux 32
mov eax 1
xor ebx,ebx
int 0x80

Related

Unexpected behaviour in simple pointer arithmetics in kernel space C code [duplicate]

I am currently following this workbook on build an operating system.
My intention is to write a 64-bit kernel. I have got as far as loading the "kernel" code and writing individual characters to the frame buffer while in text mode.
My problem appears when I add a level of indirection to writing a single character to the frame buffer by wrapping the code in a function. It would appear that the char value passed into the function is being corrupted in some way.
I have three files:
bootloader.asm
; bootloader.asm
[org 0x7c00]
KERNEL_OFFSET equ 0x1000
mov bp, 0x9000
mov sp, bp
; load the kernel from boot disk
mov bx, KERNEL_OFFSET
mov dl, dl ; boot drive is set to dl
mov ah, 0x02 ; bios read sector
mov al, 15 ; read 15 sectors
mov ch, 0x00 ; cylinder 0
mov cl, 0x02 ; read from 2nd sector
mov dh, 0x00 ; select head 0
int 0x13
; THERE COULD BE ERRORS HERE BUT FOR NOW ASSUME IT WORKS
; switch to protected mode
cli
lgdt [gdt.descriptor]
mov eax, cr0
or eax, 1
mov cr0, eax
jmp CODE_SEGMENT:start_protected_mode
[bits 32]
start_protected_mode:
mov ax, DATA_SEGMENT
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ebp, 0x90000
mov esp, ebp
call KERNEL_OFFSET
jmp $
[bits 16]
gdt: ; Super Simple Global Descriptor Table
.start:
.null:
dd 0x0
dd 0x0
.code:
dw 0xffff
dw 0x0
db 0x0
db 10011010b
db 11001111b
db 0x0
.data:
dw 0xffff
dw 0x0
db 0x0
db 10010010b
db 11001111b
db 0x0
.end:
.descriptor:
dw .end - .start
dd .start
CODE_SEGMENT equ gdt.code - gdt.start
DATA_SEGMENT equ gdt.data - gdt.start
times 510-($-$$) db 0
dw 0xaa55
bootkernel.asm
[bits 32]
[extern main]
[global _start]
_start:
call main
jmp $
kernel.c
// LEGACY MODE VIDEO DRIVER
#define FRAME_BUFFER_ADDRESS 0xb8002
#define GREY_ON_BLACK 0x07
#define WHITE_ON_BLACK 0x0f
void write_memory(unsigned long address, unsigned int index, unsigned char value)
{
unsigned char * memory = (unsigned char *) address;
memory[index] = value;
}
unsigned int frame_buffer_offset(unsigned int col, unsigned int row)
{
return 2 * ((row * 80u) + col);
}
void write_frame_buffer_cell(unsigned char c, unsigned char a, unsigned int col, unsigned int row)
{
unsigned int offset = frame_buffer_offset(col, row);
write_memory(FRAME_BUFFER_ADDRESS, offset, c);
write_memory(FRAME_BUFFER_ADDRESS, offset + 1, a);
}
void main()
{
unsigned int offset = frame_buffer_offset(0, 1);
write_memory(FRAME_BUFFER_ADDRESS, offset, 'A');
write_memory(FRAME_BUFFER_ADDRESS, offset + 1, GREY_ON_BLACK);
write_frame_buffer_cell('B', GREY_ON_BLACK, 0, 1);
}
The .text section is linked to start from 0x1000 which is where the bootloader expects the kernel to start.
The linker.ld script is
SECTIONS
{
. = 0x1000;
.text : { *(.text) } /* Kernel is expected at 0x1000 */
}
The Make file that puts this all together is:
bootloader.bin: bootloader.asm
nasm -f bin bootloader.asm -o bootloader.bin
bootkernel.o: bootkernel.asm
nasm -f elf64 bootkernel.asm -o bootkernel.o
kernel.o: kernel.c
gcc-6 -Wextra -Wall -ffreestanding -c kernel.c -o kernel.o
kernel.bin: bootkernel.o kernel.o linker.ld
ld -o kernel.bin -T linker.ld bootkernel.o kernel.o --oformat binary
os-image: bootloader.bin kernel.bin
cat bootloader.bin kernel.bin > os-image
qemu: os-image
qemu-system-x86_64 -d guest_errors -fda os-image -boot a
I've taken a screen shot of the output that I am getting. I expect 'A' to appear in the 0th column of the 1st row and for 'B' to appear on the 1st column of the 0th row. For some reason I am getting another character.
Output of gcc-6 -S kernel.c
.file "kernel.c"
.text
.globl write_memory
.type write_memory, #function
write_memory:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -24(%rbp)
movl %esi, -28(%rbp)
movl %edx, %eax
movb %al, -32(%rbp)
movq -24(%rbp), %rax
movq %rax, -8(%rbp)
movl -28(%rbp), %edx
movq -8(%rbp), %rax
addq %rax, %rdx
movzbl -32(%rbp), %eax
movb %al, (%rdx)
nop
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size write_memory, .-write_memory
.globl frame_buffer_offset
.type frame_buffer_offset, #function
frame_buffer_offset:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl %esi, -8(%rbp)
movl -8(%rbp), %edx
movl %edx, %eax
sall $2, %eax
addl %edx, %eax
sall $4, %eax
movl %eax, %edx
movl -4(%rbp), %eax
addl %edx, %eax
addl %eax, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size frame_buffer_offset, .-frame_buffer_offset
.globl write_frame_buffer_cell
.type write_frame_buffer_cell, #function
write_frame_buffer_cell:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movl %esi, %eax
movl %edx, -28(%rbp)
movl %ecx, -32(%rbp)
movb %dil, -20(%rbp)
movb %al, -24(%rbp)
movl -32(%rbp), %edx
movl -28(%rbp), %eax
movl %edx, %esi
movl %eax, %edi
call frame_buffer_offset
movl %eax, -4(%rbp)
movzbl -20(%rbp), %edx
movl -4(%rbp), %eax
movl %eax, %esi
movl $753666, %edi
call write_memory
movzbl -24(%rbp), %eax
movl -4(%rbp), %edx
leal 1(%rdx), %ecx
movl %eax, %edx
movl %ecx, %esi
movl $753666, %edi
call write_memory
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size write_frame_buffer_cell, .-write_frame_buffer_cell
.globl main
.type main, #function
main:
.LFB3:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl $1, %esi
movl $0, %edi
call frame_buffer_offset
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
movl $65, %edx
movl %eax, %esi
movl $753666, %edi
call write_memory
movl -4(%rbp), %eax
addl $1, %eax
movl $7, %edx
movl %eax, %esi
movl $753666, %edi
call write_memory
movl $0, %ecx
movl $1, %edx
movl $7, %esi
movl $66, %edi
call write_frame_buffer_cell
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3:
.size main, .-main
.ident "GCC: (Ubuntu 6.2.0-3ubuntu11~16.04) 6.2.0 20160901"
.section .note.GNU-stack,"",#progbits
I can reproduce your exact output if the code is modified to be:
unsigned int offset = frame_buffer_offset(0, 1);
write_memory(FRAME_BUFFER_ADDRESS, offset, 'A');
write_memory(FRAME_BUFFER_ADDRESS, offset + 1, GREY_ON_BLACK);
write_frame_buffer_cell('B', GREY_ON_BLACK, 1, 0);
The difference being in the last line ('B', GREY_ON_BLACK, 1, 0);. Originally you had ('B', GREY_ON_BLACK, 0, 1); . This is in line with what you described you were trying to do when you said:
I've taken a screen shot of the output that I am getting. I expect 'A' to appear in the 0th column of the 1st row and for 'B' to appear on the 1st column of the 0th row.
I gather you may have posted the wrong code in this question. This is the output I get:
It seems you are new to OS development. Your bootloader code only places the CPU into 32-bit protected mode, but to run a 64-bit kernel you need to be in 64-bit longmode. If you are just getting started I'd suggest falling back to writing a 32-bit kernel for purposes of learning at this early stage. At the bottom I have a 64-bit long mode section with a link to a longmode tutorial that could be used to modify your bootloader to run 64-bit code.
Primary Issue Causing Unusual Behaviour
You are experiencing an issue primarily related to the fact that you are generating 64-bit code with GCC but you are running it in 32-bit protected mode according to your bootloader code. 64-bit code generation running in 32-bit protected mode may appear to execute, but it will do it incorrectly. In simple OSes where you are simply displaying to the video display you may often see unexpected output as a side effect. Your program could triple fault the machine, but you got unlucky that the side effect seemed to display something on the video display. You may have been under the false impression that things were working as they should when they really weren't.
This question is somewhat similar to another Stackoverflow question. After the original poster of that question made available a complete example it became clear that it was his issue. Part of my answer to him to resolve the issue was as follows:
Likely Cause of Undefined Behavior
After all the code and the make file were made available in EDIT 2 it became clear that one significant problem was that most of the code was compiled and linked to 64-bit objects and executables. That code won't work in 32-bit protected mode.
In the make file make these adjustments:
When compiling with GCC you need to add -m32 option
When assembling with GNU Assembler (as) targeting 32-bit objects you need to use --32
When linking with LD you need to add the -melf_i386 option
When assembling with NASM targeting 32-bit objects you need to change -f elf64 to -f elf32
With that in mind you can alter your Makefile to generate 32-bit code. It could look like:
bootloader.bin: bootloader.asm
nasm -f bin bootloader.asm -o bootloader.bin
bootkernel.o: bootkernel.asm
nasm -f elf32 bootkernel.asm -o bootkernel.o
kernel.o: kernel.c
gcc-6 -m32 -Wextra -Wall -ffreestanding -c kernel.c -o kernel.o
kernel.bin: bootkernel.o kernel.o linker.ld
ld -melf_i386 -o kernel.bin -T linker.ld bootkernel.o kernel.o --oformat binary
os-image: bootloader.bin kernel.bin
cat bootloader.bin kernel.bin > os-image
qemu: os-image
qemu-system-x86_64 -d guest_errors -fda os-image -boot a
I gather when you started having issues with your code you ended up trying 0xb8002 as the address for your video memory. It should be 0xb8000. You'll need to modify:
#define FRAME_BUFFER_ADDRESS 0xb8002
To be:
#define FRAME_BUFFER_ADDRESS 0xb8000
Making all these changes should resolve your issues. This is what the output I got looked like after the changes mentioned above:
Other observations
In write_memory you use:
unsigned char * memory = (unsigned char *) address;
Since you are using 0xb8000 that is memory mapped to the video display you should mark it as volatile since a compiler could optimize things away not knowing that there is a side effect to writing to that memory (namely displaying characters on a display). You might wish to use:
volatile unsigned char * memory = (unsigned char *) address;
In your bootloader.asm You really should explicitly set the A20 line on. You can find information about doing that in this OSDev Wiki article. The status of the A20 line at the point a bootloader starts executing may vary between emulators. Failure to set it on could cause issues if you try to access memory areas on an odd numbered megabyte boundary (like 0x100000 to 0x1fffff, 0x300000 to 0x1fffff etc). Accesses to the odd numbered megabyte memory regions will actually read data from the even numbered memory region just below it. This is usually not behaviour you want.
64-bit long mode
If you want to run 64-bit code you will need to place the processor into 64-bit long mode. This is a bit more involved than entering 32-bit protected mode. Information on 64-bit longmode can be found in the OSDev wiki. Once properly in 64-bit longmode you can use 64-bit instructions generated by GCC.

Testing C code by compiling with -s switch? [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 6 years ago.
Improve this question
I want to test my code (I know my code is still incomplete -- yes I am planning to complete it before I compile it) to see if it gives the correct assembly code by compiling with -s switch, how do I do this?
I am not very familiar with compiling. All I did so far was save my file. Now I need to compile it to be able to run it.
typedef enum {MODE_A, MODE_B, MODE_C, MODE_D, MODE_E} mode_t;
long switch3 (long *p1, long *p2, mode_t action) {
long result = 0;
switch(action){
case MODE_A:
case MODE_B:
case MODE_C:
case MODE_D:
case MODE_E:
default:; // don't forget the colon
}
return result;
}
Open an editor, Vi or Emacs for example
Type and save your code in a file, maybe main.c
Exit the editor
Type gcc -S main.c or clang -S main.c in the terminal. You can also add a -fverbose-asm flag to tell the complier to add more information in the output, or a -masm=intel flag to inspect the assembly output much nicer.
On success, a file named main.s will be generated under the current directory, containing the assembly code; on failure, error messages will be printed on the screen.
Also note that your C code will only be compiled when it's compilable, so you have to modify your code first. At least, change default; to default:;
Here is the assembly code produced by clang -S main.c on my machine:
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 11
.globl _switch3
.align 4, 0x90
_switch3: ## #switch3
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
movq %rdi, -8(%rbp)
movq %rsi, -16(%rbp)
movl %edx, -20(%rbp)
movq $0, -32(%rbp)
movl -20(%rbp), %edx
subl $4, %edx
movl %edx, -36(%rbp) ## 4-byte Spill
ja LBB0_2
jmp LBB0_1
LBB0_1:
jmp LBB0_2
LBB0_2:
jmp LBB0_3
LBB0_3:
movq -32(%rbp), %rax
popq %rbp
retq
.cfi_endproc
.subsections_via_symbols
To compile without linking using GNU Compiler Collection (gcc) you can use the -S switch:
jan#jsn-dev:~/src/so> gcc -S main.c
main.c: In function ‘switch3’:
main.c:11:12: error: expected ‘:’ before ‘;’ token
default;
^
After correcting your code with the suggested fix, you get:
jan#jsn-dev:~/src/so> gcc -S main.c
jan#jsn-dev:~/src/so> cat main.s
.file "main.c"
.text
.globl switch3
.type switch3, #function
switch3:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -24(%rbp)
movq %rsi, -32(%rbp)
movl %edx, -36(%rbp)
movq $0, -8(%rbp)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size switch3, .-switch3
.ident "GCC: (SUSE Linux) 4.8.3 20140627 [gcc-4_8-branch revision 212064]"
.section .note.GNU-stack,"",#progbits

Problems with compiled gcc .s Code when linking

First time here, Im running Kali linux 64bits ,Im a linux rookie and a new to ASM aswell.... So I pulled a code in C ,the wich works perfectly fine..... here is the code:
#include<stdio.h>
#include<string.h> //strlen
#include<sys/socket.h>
#include<arpa/inet.h> //inet_addr
int main(int argc , char *argv[])
{
int socket_desc;
struct sockaddr_in server;
char *message , server_reply[2000];
//Create socket
socket_desc = socket(AF_INET , SOCK_STREAM , 0);
if (socket_desc == -1)
{
printf("Could not create socket");
}
server.sin_addr.s_addr = inet_addr("127.0.0.1");
server.sin_family = AF_INET;
server.sin_port = htons( 2000 );
//Connect to remote server
if (connect(socket_desc , (struct sockaddr *)&server , sizeof(server)) <0)
{
puts("connect error");
return 1;
}
puts("Connected\n");
//Send some data
message = "Hola!!!!\n\r\n";
if( send(socket_desc , message , strlen(message) , 0) < 0)
{
puts("Send failed");
return 1;
}
puts("Data Send\n");
//Receive a reply from the server
if( recv(socket_desc, server_reply , 2000 , 0) < 0)
{
puts("recv failed");
}
puts("Reply received\n");
puts(server_reply);
return 0;
}
So ... I use gcc -S -o example.s example.c , to get the ASM code... wich is:
.file "test.c"
.section .rodata
.LC0:
.string "Could not create socket"
.LC1:
.string "127.0.0.1"
.LC2:
.string "connect error"
.LC3:
.string "Connected\n"
.align 8
.LC4:
.string "Hola!! , \n\r\n"
.LC5:
.string "Send failed"
.LC6:
.string "Data Send\n"
.LC7:
.string "recv failed"
.LC8:
.string "Reply received\n"
.text
.globl main
.type main, #function
main:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $2048, %rsp
movl %edi, -2036(%rbp)
movq %rsi, -2048(%rbp)
movl $0, %edx
movl $1, %esi
movl $2, %edi
call socket
movl %eax, -4(%rbp)
cmpl $-1, -4(%rbp)
jne .L2
movl $.LC0, %edi
movl $0, %eax
call printf
.L2:
movl $.LC1, %edi
call inet_addr
movl %eax, -28(%rbp)
movw $2, -32(%rbp)
movl $2000, %edi
call htons
movw %ax, -30(%rbp)
leaq -32(%rbp), %rcx
movl -4(%rbp), %eax
movl $16, %edx
movq %rcx, %rsi
movl %eax, %edi
call connect
testl %eax, %eax
jns .L3
movl $.LC2, %edi
call puts
movl $1, %eax
jmp .L7
.L3:
movl $.LC3, %edi
call puts
movq $.LC4, -16(%rbp)
movq -16(%rbp), %rax
movq %rax, %rdi
call strlen
movq %rax, %rdx
movq -16(%rbp), %rsi
movl -4(%rbp), %eax
movl $0, %ecx
movl %eax, %edi
call send
testq %rax, %rax
jns .L5
movl $.LC5, %edi
call puts
movl $1, %eax
jmp .L7
.L5:
movl $.LC6, %edi
call puts
leaq -2032(%rbp), %rsi
movl -4(%rbp), %eax
movl $0, %ecx
movl $2000, %edx
movl %eax, %edi
call recv
testq %rax, %rax
jns .L6
movl $.LC7, %edi
call puts
.L6:
movl $.LC8, %edi
call puts
leaq -2032(%rbp), %rax
movq %rax, %rdi
call puts
movl $0, %eax
.L7:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size main, .-main
.ident "GCC: (Debian 4.9.2-10) 4.9.2"
So after using as example.s -o example.o, I use ld example.o -o example, and thats where I get these following errors:
ld: warning: cannot find entry symbol _start; defaulting to 00000000004000b0
test.o: In function main':
test.c:(.text+0x28): undefined reference tosocket'
test.c:(.text+0x40): undefined reference to printf'
test.c:(.text+0x4a): undefined reference toinet_addr'
test.c:(.text+0x5d): undefined reference to htons'
test.c:(.text+0x77): undefined reference toconnect'
test.c:(.text+0x85): undefined reference to puts'
test.c:(.text+0x99): undefined reference toputs'
test.c:(.text+0xad): undefined reference to strlen'
test.c:(.text+0xc3): undefined reference tosend'
test.c:(.text+0xd2): undefined reference to puts'
test.c:(.text+0xe3): undefined reference toputs'
test.c:(.text+0xfe): undefined reference to recv'
test.c:(.text+0x10d): undefined reference toputs'
test.c:(.text+0x117): undefined reference to puts'
test.c:(.text+0x126): undefined reference toputs'
it seems to me that gcc is not usingn correctly .start, global main, etc. but to be honest I wouldnt know how to fix it., if this is correct then why?
Any help Will be appreciate.
Thank you.
The problem is that ld example.o -o example tries to link just example.o and nothing else. To get missing symbols you need to link much more (e.g. startup code, standard library, C runtime, etc). Try gcc -v example.c to see how the linker should be invoked.
The commands given in Harry's answer are the good ones:
gcc -Wall -O -fverbose-asm -S example.c
gcc -c example.s -o example.o
gcc example.o -o example
Basically, you should be aware that GCC would link your code with :
startup code like crt0 (actually, that is several object files today)
the C standard library (libc.so) (which will do some system calls)
the libgcc providing a few low level, processor specific, functions (e.g. 64 bits arithmetic on 32 bits machine); it has a permissive but ad-hoc license.
and you often need some dynamic linker like ld-linux(8)
the kernel would provide vdso(7)
How all this is linked together is known by the gcc command, which will start some ld. Replace gcc with gcc -v in your compilation commands to understand what exactly is happening. If you want to issue your own ld command you should add the options providing what I have listed above. The errors you are getting are notably because of the lack of crt0 & libc
BTW on Linux most C standard libraries (e.g. GNU libc or musl-libc) are free software (and so is GCC), so you can study their source code.
Try also gcc -dumpspecs which describes what gcc knows about issuing various commands (notice that gcc is only a driving program; the real C compiler is some cc1). Read also the wikipage on GCC. Some slides and references on the documentation of GCC MELT gives a lot more information. See also this and the picture there.
I strongly recommend to also use gcc to assemble (some assembler code of yours) and to link stuff (because you don't want to handle all the gory details mentioned above, plus some other ones I did not mention).
Try this
gcc -Wall -O -fverbose-asm -S example.c
gcc -c example.s -o example.o
gcc example.o -o example
This is an important part:
/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crt1.o
/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crti.o
/usr/lib/gcc/x86_64-linux-gnu/4.9/crtbegin.o
-lgcc
--as-needed -lgcc_s
--no-as-needed -lc -lgcc
--as-needed -lgcc_s
--no-as-needed /usr/lib/gcc/x86_64-linux-gnu/4.9/crtend.o
/usr/lib/gcc/x86_64-linux-gnu/4.9/../../../x86_64-linux-gnu/crtn.o
crt1, crti, crtbegin supply the startup code where the _start entry point is actually defined (later on the control is passed to your main), stdio is initialized, etc. Similarly strand and crtn handle the cleanup after main return. lc supplies the standard library (like puts and other missing symbols). lgcc and lgcc_s have the gcc-specific runtime support.
The bottomline is, you need all that to be linked in.

Incrementing a variable through embedded assembly language

I am trying to understand how to embed assembly language in C (using gcc on x86_64 architecture). I wrote this program to increment the value of a single variable. But I am getting garbage value as output. And ideas why?
#include <stdio.h>
int main(void) {
int x;
x = 4;
asm("incl %0": "=r"(x): "r0"(x));
printf("%d", x);
return 0;
}
Thanks
Update The program is giving expected result on gcc 4.8.3 but not on gcc 4.6.3. I am pasting the assembly output of the non-working code:
.file "abc.c"
.section .rodata
.LC0:
.string "%d"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
subq $24, %rsp
movl $4, -20(%rbp)
movl -20(%rbp), %eax
incl %edx
movl %edx, %ebx
.cfi_offset 3, -24
movl %ebx, -20(%rbp)
movl $.LC0, %eax
movl -20(%rbp), %edx
movl %edx, %esi
movq %rax, %rdi
movl $0, %eax
call printf
movl $0, %eax
addq $24, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",#progbits
You don't need to say x twice; once is sufficient:
asm("incl %0": "+r"(x));
The +r says that the value will be input and output.
Your way, with separate inputs and output registers, requires that you take the input from %1, add one, and write the output to %0, but you can't do that with incl.
The reason it works on some compilers is because GCC is free to allocate both %0 and %1 to the same register, and appears to have done so in those cases, but it does not have to. Incidentally, if you want to prevent GCC allocating an input and output to the same register (say, if you want to initialize the output before using the input to calculate a final output), you need to use the & modifier.
The documentation for the modifiers is here.

Creating a directory in linux assembly language

I am trying to create a small assembly program to create a folder. I looked up the system call for creating a directory on this page. It says that it is identified by 27h. How would I go about implementing the mkdir somename in assembly?
I am aware that the program should move 27 into eax but I am unsure where to go next. I have googled quite a bit and no one seems to have posted anthing about this online.
This is my current code (I don't know in which register to put filename and so on):
section .data
section .text
global _start
mov eax, 27
mov ????????
....
int 80h
Thanks
One way of finding out, is using GCC to translate the following C code:
#include <stdio.h>
#include <sys/stat.h>
int main()
{
if (mkdir("testdir", 0777) != 0)
{
return -1;
}
return 0;
}
to assembly, with: gcc mkdir.c -S
.file "mkdir.c"
.section .rodata
.LC0:
.string "testdir"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
subl $16, %esp
movl $511, 4(%esp)
movl $.LC0, (%esp)
call mkdir ; interesting call
testl %eax, %eax
setne %al
testb %al, %al
je .L2
movl $-1, %eax
jmp .L3
.L2:
movl $0, %eax
.L3:
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (GNU) 4.5.1 20100924 (Red Hat 4.5.1-4)"
.section .note.GNU-stack,"",#progbits
Anyway, ProgrammingGroundUp page 272 lists important syscalls, including mkdir:
%eax Name %ebx %ecx %edx Notes
------------------------------------------------------------------
39 mkdir NULL terminated Permission Creates the given
directory name directory. Assumes all
directories leading up
to it already exist.
You could also do like the Assembly Howto is suggesting. But indeed, calling mkdir from Libc is more portable. You need to look into asm/unistd.h to get the syscall number.

Resources