I am new here and I converted code from C language to asm. However, it doesn't look like normal code in asm language. So my question is how can I convert a code from C(or C++) language to Assembly language, that the converted asm code could be run on Emu8086.
Here is a simple c code:
#include<stdio.h>
void Hello(){
printf("Hello world");
}
int main (){
Hello();
return 0;
}
Then I converted it with gcc -S test.c and here is the answer:
.file "test1.c"
.section .rodata
.LC0:
.string "Hello world"
.text
.globl Hello
.type Hello, #function
Hello:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
leaq .LC0(%rip), %rdi
movl $0, %eax
call printf#PLT
nop
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size Hello, .-Hello
.globl main
.type main, #function
main:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $0, %eax
call Hello
movl $0, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.ident "GCC: (Debian 6.3.0-18+deb9u1) 6.3.0 20170516"
.section .note.GNU-stack,"",#progbits
Emu8086 does what it says on the tin: it emulates an Intel 8086 processor. The assembly that GCC has produced is for your host machine (since you haven't told it to do otherwise), which evidently uses an x86-64 instructions set. The 8086 can't understand most of these instructions. You need to cross-compile it to an x86 16-bit real-mode executable. The -m16 option on GCC will generate 16-bit code, but it apparently still uses 32-bit registers (EAX, etc.). So you will have to find a compiler that targets the basic 8086 instruction set.
Related
I use gcc. We know the cpp expands all macros definitions and include statements and passes the result to the actual compiler to create an executable file. I tested the result of cpp and see that some parts of header which are necessary, are included in output of cpp for each source file.
But I want to know if in a project I have a header that is included in multiple source files, then does the related content to that header will be duplicated multiple times in produced executable? Or there will be multiple shortcuts to them? If there will be shortcuts, I want to know why cpp replaces shortcuts of source files with more code and pass the result to cc1?
For example in a header, I have a function with name kids
int kids(){
return 1;
}
and call it in test.c source file. The cpp puts the definition of kids in its output. But in compiled file (test.s which is the result of cc -S test.c), I only see call kids which is replaced instead of function definition. I call that a shortcut.I want to know what will happen for that function and its calls in executable?
Edit:
The assembly off test.c is:
.file "test.c"
.text
.globl kids
.type kids, #function
kids:
.LFB0:
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $1, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size kids, .-kids
.section .rodata
.LC0:
.string "C Rocks + Ali!"
.LC1:
.string "%d"
.text
.globl main
.type main, #function
main:
.LFB1:
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
leaq .LC0(%rip), %rdi
call puts#PLT
movl $0, %eax
call kids
movl %eax, %esi
leaq .LC1(%rip), %rdi
movl $0, %eax
call printf#PLT
movl $0, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
.section .note.GNU-stack,"",#progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:
I'm learning to program and sometimes I find that using a variable to return makes my code more readable.
I was wondering if these functions perform the same operations and are equally efficient.
CASE 1:
int Foo1()
{
int x = 5 + 6 + 7; // Return variable
return x;
}
int Foo2(int y)
{
return 5 + 6 + 7;
}
In this case I think that the initialization and sum occur at compile time so there's no difference between them.
CASE 2:
int Foo1(int y)
{
int x = y + 6 + 7; // Return variable
return x;
}
int Foo2(int y)
{
return y + 6 + 7;
}
But, what happen in this case? It seems that the initialization occur at execution time and it has to perform it.
Is returning the value directly faster than initialize a variable and then returning it? Should I always try to return values directly instead using a variable to return?
You can easily try this yourself.
You can get the assembly from your compiler
Without optimization:
(gcc -S -O0 -o src.S src.c)
.file "so_temp.c"
.text
.globl case1Foo1
.type case1Foo1, #function
case1Foo1:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $18, -4(%rbp)
movl -4(%rbp), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size case1Foo1, .-case1Foo1
.globl case1Foo2
.type case1Foo2, #function
case1Foo2:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $18, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size case1Foo2, .-case1Foo2
.globl case2Foo1
.type case2Foo1, #function
case2Foo1:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -20(%rbp)
movl -20(%rbp), %eax
addl $13, %eax
movl %eax, -4(%rbp)
movl -4(%rbp), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size case2Foo1, .-case2Foo1
.globl case2Foo2
.type case2Foo2, #function
case2Foo2:
.LFB3:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
addl $13, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3:
.size case2Foo2, .-case2Foo2
.ident "GCC: (Ubuntu 8.3.0-6ubuntu1) 8.3.0"
.section .note.GNU-stack,"",#progbits
Ther you can see, that the foo2 versions have a few instructions less than the foo1 versions of the functions.
With optimization turned to O3:
(gcc -S -O3 -o src.S src.c)
.file "so_temp.c"
.text
.p2align 4,,15
.globl case1Foo1
.type case1Foo1, #function
case1Foo1:
.LFB0:
.cfi_startproc
movl $18, %eax
ret
.cfi_endproc
.LFE0:
.size case1Foo1, .-case1Foo1
.p2align 4,,15
.globl case1Foo2
.type case1Foo2, #function
case1Foo2:
.LFB5:
.cfi_startproc
movl $18, %eax
ret
.cfi_endproc
.LFE5:
.size case1Foo2, .-case1Foo2
.p2align 4,,15
.globl case2Foo1
.type case2Foo1, #function
case2Foo1:
.LFB2:
.cfi_startproc
leal 13(%rdi), %eax
ret
.cfi_endproc
.LFE2:
.size case2Foo1, .-case2Foo1
.p2align 4,,15
.globl case2Foo2
.type case2Foo2, #function
case2Foo2:
.LFB7:
.cfi_startproc
leal 13(%rdi), %eax
ret
.cfi_endproc
.LFE7:
.size case2Foo2, .-case2Foo2
.ident "GCC: (Ubuntu 8.3.0-6ubuntu1) 8.3.0"
.section .note.GNU-stack,"",#progbits
both versions are exactly the same.
Still I don't think that this is something you should optimize yourself.
In this case readable code should be preferred, especially as code normally isn't compiled with optimizations turned off.
Case 2 is more efficient, but is often not needed as the compiler is extremely likely to optimize case 1 into case 2.
Go for readability if it doesn't hurt performance (as in this case).
Any compiler of at least modest quality will, at even low levels of optimization (such as GCC’s -O1), compile these to the same code. For the most part, any correct optimization you can easily see will be performed by a good compiler.
The C standard does not require compilers to mindlessly compile code into instructions that perform the exact steps in the C source code. It only requires compilers to produce code that has the same effects. Those effects are defined in terms of observable behavior, which includes the output of the program, interactions with the user, and access to volatile objects (special objects you will learn about later). Compilers will eliminate things like intermediate variables as long as they can do so without changing the observable behavior.
I'm working in AWD obstacle avoidance robot in assembly x86. I can find out some program which is already executed in C language but can't find executed in assembly x86.
How do convert these C codes to Assembly x86 code?
The whole part of codes here:
http://www.mertarduino.com/arduino-obstacle-avoiding-robot-car-4wd/2018/11/22/
void compareDistance() // find the longest distance
{
if (leftDistance>rightDistance) //if left is less obstructed
{
turnLeft();
}
else if (rightDistance>leftDistance) //if right is less obstructed
{
turnRight();
}
else //if they are equally obstructed
{
turnAround();
}
}
int readPing() { // read the ultrasonic sensor distance
delay(70);
unsigned int uS = sonar.ping();
int cm = uSenter code here/US_ROUNDTRIP_CM;
return cm;
}
How do convert these C codes to Assembly x86 code?
Converting source code to assembly is basically what a compiler does, so just compile it. Most (if not all) compilers have the option of outputting the intermediate assembly code.
If you use gcc -S main.c you will get a file called main.s containing the assembly code.
Here is an example:
$ cat hello.c
#include <stdio.h>
void print_hello() {
puts("Hello World!");
}
int main() {
print_hello();
}
$ gcc -S hello.c
$ cat hello.s
.file "hello.c"
.text
.section .rodata
.LC0:
.string "Hello World!"
.text
.globl print_hello
.type print_hello, #function
print_hello:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
leaq .LC0(%rip), %rdi
call puts#PLT
nop
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size print_hello, .-print_hello
.globl main
.type main, #function
main:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $0, %eax
call print_hello
movl $0, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.ident "GCC: (Debian 8.3.0-6) 8.3.0"
.section .note.GNU-stack,"",#progbits
How do convert these C codes to Assembly x86 code?
You can use the gcc -m32 -S main.c command to do that, where :
the -S flag indicates that the output must be assembly,
the -m32 flag indicates that you want to produce i386 (32-bit) output.
hello_world.c
#include <stdio.h>
int main()
{
printf("Hello World\n");
return 0;
}
Running gcc hello_world.c -S generates a hello_world.s file in assembly language.
hello_world.s
.file "hello_world.c"
.section .rodata
.LC0:
.string "Hello World"
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl $.LC0, %edi
call puts
movl $0, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",#progbits
Is there some way to find out in what type of assembly language the code was generated in (besides knowing the syntax of all assembly languages.)?
Reference for myself or anyone else who didn't know this:
To get your processor architecture run the following:
uname -p
It is the AT&T syntax for the GNU assembler of the target code's CPU by default. There are options to alter that.
Why does gcc take a long time to compile a C code if it has a big array in the extern block?
#define MAXNITEMS 100000000
int buff[MAXNITEMS];
int main (int argc, char *argv[])
{
return 0;
}
I suspect a bug somewhere. There is no reason for the compile to take longer, no matter how big the array is since the compiler will just write an integer into the .bss segment since you never assign a value to an element in it. Proof:
.file "big.c"
.comm buff,4000000000000000000,32
.text
.globl main
.type main, #function
main:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movq %rsi, -16(%rbp)
movl $0, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.7.3-1ubuntu1) 4.7.3"
.section .note.GNU-stack,"",#progbits
As you can see, the only thing left of the array in the assembly is .comm buff,4000000000000000000,32.
I suggest you gcc with -S to see the assembler code. Maybe your version of GCC has bug. I tested with GCC 4.7.3 and the compile times here are the same, no matter which value I use.
Related: Where are static variables stored (in C/C++)?