Look at this code. I return an address of the compound literal here.
#include <stdio.h>
#define FOO(bar) ((bar)->a + (bar)->b)
struct bar {
int a;
int b;
};
static struct bar * to_bar(int a, int b);
int main(void)
{
int baz = FOO((struct bar *) {to_bar(1, 2)});
printf("%d\n", baz);
return 0;
}
static struct bar *
to_bar(int a, int b)
{
return &(struct bar) {a, b};
}
Output:
3
ISO/IEC 9899 says:
If the compound literal occurs outside the body of a function, the
object has static storage duration; otherwise, it has automatic
storage duration associated with the enclosing block.
I. e., in the to_bar function the unnamed object, created by the compound literal has automatic storage duration. Thereby, it will be destroyed outside scope of to_bar. It seems, this code produces undefined behaviour (based on the standard). Is it so?
You are right. In your example, you immediately retrieved the fields after returning from to_bar, so you didn't have time to corrupt the stack frame of the deceased to_bar function. But here's another example:
struct bar {
int a;
int b;
};
static struct bar * to_bar(int a, int b);
int main(void)
{
struct bar * corrupted_bar = to_bar(1, 2);
printf("this print will corrupt\n");
int baz = corrupted_bar->a + corrupted_bar->b;
printf("baz = %d\n", baz);
return 0;
}
static struct bar *
to_bar(int a, int b)
{
return &(struct bar) {a, b};
}
which when executed
this print will corrupt
baz = -59543507
If you look at the assembly
.LC0:
.string "this print will corrupt"
.LC1:
.string "baz = %d\n"
main:
push rbp
mov rbp, rsp
sub rsp, 16
mov esi, 2
mov edi, 1
call to_bar ; call to_bar
mov QWORD PTR [rbp-8], rax ; save address returned to a local pointer
mov edi, OFFSET FLAT:.LC0 ; argument into puts()
call puts ; call puts(), which creates its own local variables that corrupts the bar struct
mov rax, QWORD PTR [rbp-8]
mov edx, DWORD PTR [rax]
mov rax, QWORD PTR [rbp-8]
mov eax, DWORD PTR [rax+4]
add eax, edx
mov DWORD PTR [rbp-12], eax
mov eax, DWORD PTR [rbp-12]
mov esi, eax
mov edi, OFFSET FLAT:.LC1
mov eax, 0
call printf
mov eax, 0
leave
ret
to_bar:
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov DWORD PTR [rbp-24], esi
mov eax, DWORD PTR [rbp-20]
mov DWORD PTR [rbp-8], eax ; field 'a' gets stored, notice dest addr rbp-8 is in the stack frame of this function (local variable)
mov eax, DWORD PTR [rbp-24]
mov DWORD PTR [rbp-4], eax ; field 'b' gets stored, same as above
lea rax, [rbp-8]
pop rbp
ret
Related
I'm trying to get the hang of Assembly for class. So for this C code:
int a = 10;
int b = 20;
int *aPtr = &a;
int *bPtr = &b;
b += a;
*aPtr = *aPtr + *bPtr; //dereference
printf(“aPtr points to value: %d\n”, *aPtr);
*** Updated
I tried this in Assembly:
.data
var1 DWORD 10
var2 DWORD 20
var3 DWORD ?
.code
main PROC
mov eax, 10
mov ebx, 20
add ebx, eax
mov var3, ebx
mov eax, offset var1
mov ebx, offset var3
mov ecx, [eax]
mov edx, [ebx]
add ecx, edx
mov var3, ecx
INVOKE ExitProcess, 0
main endp
end
But I know that the pointers can't simply be deferenced and added together like that. We also can't use lea, so I'm at a loss on how to add a dereferenced value to another dereferenced value in Assembly; I'm also not sure how I would convert the printf statement correctly. Could I get some help?
Your code is not yet updating the a and b variables with the results from the operations.
int a = 10;
int b = 20;
int *aPtr = &a;
int *bPtr = &b;
a SDWORD 10
b SDWORD 20
aPtr DWORD offset a
bPtr DWORD offset b
b += a;
mov eax, a
add b, eax ; Result in b (30)
*aPtr = *aPtr + *bPtr;
mov edi, aPtr
mov esi, bPtr
mov eax, [edi]
add eax, [esi]
mov [edi], eax ; Result in a (40)
printf(“aPtr points to value: %d\n”, *aPtr);
msg db 'aPtr points to value: %d\n', 0
...
mov edi, aPtr
mov eax, [edi]
push eax
push offset msg
call _printf
add esp, 8
I'm trying to get the address of the end of a function, so I use __builtin_return_address but it returns a value higher than the next declared funciton, here main:
uintptr_t foo( void )
{
return (uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0));
}
int main( int argc, char **argv )
{
printf("foo:\t%zu\nmain:\t%zu\n", foo(), (uintptr_t)main);
return (0);
}
output:
foo: 94718524985719
main: 94718524985687
But how is it possible ? How could main start address be lower than foo return address ?
__builtin_extract_return_addr(__builtin_return_address(0)) gives the address at which execution continues when the current function returns. In your snippet, this will be somewhere in main since the call to foo is in main.
32 bytes after the start of main makes sense.
When I compiled your code[1], it produced the following assembly and a difference of 26:
foo:
push rbp
mov rbp, rsp
mov rax, QWORD PTR [rbp+8]
pop rbp
ret
.LC0:
.string "foo:\t%lu\nmain:\t%lu\n"
main:
push rbp --- <--- Address of this
mov rbp, rsp |
push rbx |
sub rsp, 24 | 26 bytes
mov DWORD PTR [rbp-20], edi |
mov QWORD PTR [rbp-32], rsi |
mov ebx, OFFSET FLAT:main |
call foo ---
mov rdx, rbx <--- Address of this
mov rsi, rax
mov edi, OFFSET FLAT:.LC0
mov eax, 0
call printf
mov eax, 0
mov rbx, QWORD PTR [rbp-8]
leave
ret
I corrected the format pattern.
#include <inttypes.h>
printf("foo:\t%" PRIuPTR "\nmain:\t%" PRIuPTR "\n", foo(), (uintptr_t)main);
Why is it convention in C and Go to pass a pointer to a variable and change it rather return a new variable with the value?
In C:
#include <stdio.h>
int getValueUsingReturn() {
int value = 42;
return value;
}
void getValueUsingPointer(int* value ) {
*value = 42;
}
int main(void) {
int valueUsingReturn = getValueUsingReturn();
printf("%d\n", valueUsingReturn);
int valueUsingPointer;
getValueUsingPointer(&valueUsingPointer);
printf("%d\n", valueUsingPointer);
return 0;
}
In Go:
package main
import "fmt"
func getValueUsingReturn() int {
value := 42
return value
}
func getValueUsingPointer(value *int) {
*value = 42
}
func main() {
valueUsingReturn := getValueUsingReturn()
fmt.Printf("%d\n", valueUsingReturn)
var valueUsingPointer int
getValueUsingPointer(&valueUsingPointer)
fmt.Printf("%d\n", valueUsingPointer)
}
It there any performance benefits or restrictions in doing one or the other?
First off, I don't know enough about Go to give a judgement on it, but the answer will apply in the case of C.
If you're just working on primitive types like ints, then I'd say there is no performance difference between the two techniques.
When structs come into play, there is a very slight advantage of modifying a variable via pointer (based purely on what you're doing in your code)
#include <stdio.h>
struct Person {
int age;
const char *name;
const char *address;
const char *occupation;
};
struct Person getReturnedPerson() {
struct Person thePerson = {26, "Chad", "123 Someplace St.", "Software Engineer"};
return thePerson;
}
void changeExistingPerson(struct Person *thePerson) {
thePerson->age = 26;
thePerson->name = "Chad";
thePerson->address = "123 Someplace St.";
thePerson->occupation = "Software Engineer";
}
int main(void) {
struct Person someGuy = getReturnedPerson();
struct Person theSameDude;
changeExistingPerson(&theSameDude);
return 0;
}
GCC x86-64 11.2
With No Optimizations
Returning a struct variable through the function's return is slower because the variable has to be "built" by assigning the desired values, after which, the variable is copied to the return value.
When you're modifying a variable by pointer indirection, there is nothing to do except write the desired values to the memory addresses (based off the pointer you passed in)
.LC0:
.string "Chad"
.LC1:
.string "123 Someplace St."
.LC2:
.string "Software Engineer"
getReturnedPerson:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-40], rdi
mov DWORD PTR [rbp-32], 26
mov QWORD PTR [rbp-24], OFFSET FLAT:.LC0
mov QWORD PTR [rbp-16], OFFSET FLAT:.LC1
mov QWORD PTR [rbp-8], OFFSET FLAT:.LC2
mov rcx, QWORD PTR [rbp-40]
mov rax, QWORD PTR [rbp-32]
mov rdx, QWORD PTR [rbp-24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov rax, QWORD PTR [rbp-16]
mov rdx, QWORD PTR [rbp-8]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], rdx
mov rax, QWORD PTR [rbp-40]
pop rbp
ret
changeExistingPerson:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-8], rdi
mov rax, QWORD PTR [rbp-8]
mov DWORD PTR [rax], 26
mov rax, QWORD PTR [rbp-8]
mov QWORD PTR [rax+8], OFFSET FLAT:.LC0
mov rax, QWORD PTR [rbp-8]
mov QWORD PTR [rax+16], OFFSET FLAT:.LC1
mov rax, QWORD PTR [rbp-8]
mov QWORD PTR [rax+24], OFFSET FLAT:.LC2
nop
pop rbp
ret
main:
push rbp
mov rbp, rsp
sub rsp, 64
lea rax, [rbp-32]
mov rdi, rax
mov eax, 0
call getReturnedPerson
lea rax, [rbp-64]
mov rdi, rax
call changeExistingPerson
mov eax, 0
leave
ret
With Slight Optimization
However, most compilers today can figure out what you're trying to do here, and will equalize the performance between the two techniques.
If you want to be absolutely stingy, passing pointers is still slightly faster by a few clock cycles at best.
In returning a variable from the function, you still have to at least set the address of the return value.
mov rax, rdi
But in passing the pointer, not even this is done.
But other than that, the two techniques have no performance difference.
.LC0:
.string "Chad"
.LC1:
.string "123 Someplace St."
.LC2:
.string "Software Engineer"
getReturnedPerson:
mov rax, rdi
mov DWORD PTR [rdi], 26
mov QWORD PTR [rdi+8], OFFSET FLAT:.LC0
mov QWORD PTR [rdi+16], OFFSET FLAT:.LC1
mov QWORD PTR [rdi+24], OFFSET FLAT:.LC2
ret
changeExistingPerson:
mov DWORD PTR [rdi], 26
mov QWORD PTR [rdi+8], OFFSET FLAT:.LC0
mov QWORD PTR [rdi+16], OFFSET FLAT:.LC1
mov QWORD PTR [rdi+24], OFFSET FLAT:.LC2
ret
main:
mov eax, 0
ret
I think the short answer to you question (At least for C, I am not familiar with GO internals) is that C functions are pass by value and generally also return by value so data objects must be copied and people worried about the performance of all the copying. For large objects or objects that are complex in their depth (containing pointers to other stuff) it is often just more efficient or logical for the value being copied to be a pointer so the function can "operate" on the data without needing to copy it.
That being said, modern compilers are pretty smart about figuring out stuff like whether the parameter data will fit in registers or efficiently copying returned structures.
Bottom line is for modern C code do what seems best for your application or what is clearest to you. Avoid premature optimization if it detracts from readability at least in the beginning.
Also Compiler Explorer (https://godbolt.org/) is your friend if you want to examine the effect of different styles, especially in light of optimization.
All is in the title, I have an int** as my function argument and I want to display the int with assembly, so what i've done is:
global bar
section .text:
bar:
mov rdx, [rdi]
mov rdi, [rdx]
mov rdx, [rdi]
mov rdi, [rdx]
call rsi
ret
My C code:
#include <stdio.h>
void bar(int **i, void (*f)()); //assembly function prototype
void foo(int i)
{
printf("%d\n", i);
}
int main(void)
{
int i = 3;
int *ptr = &i;
bar(&ptr, &foo);
return (0);
}
It segfaults and says "invalid address" at foo with lldb, so I think it's because i'm not dereferencing the right way, so I'm stuck because I need to do this for a larger function. Thanks for the help.
Let me trace the assembly code:
global bar
section .text:
bar:
mov rdx, [rdi] // rdi = &ptr, rdx = *&ptr = ptr
mov rdi, [rdx] // rdx = ptr, rdi = *ptr = i
mov rdx, [rdi] // rdi = i, rdx = *i = (invalid)
mov rdi, [rdx]
call rsi
ret
This suggests actually you have to pass int****, not int**, as the first argument because you are doing dereference 4 times.
It will be like this:
#include <stdio.h>
void bar(int ****i, void (*f)()); //assembly function prototype
void foo(int i)
{
printf("%d\n", i);
}
int main(void)
{
int i = 3;
int *ptr = &i;
int **pptr = &ptr;
int ***ppptr = &pptr;
bar(&ppptr, &foo);
return (0);
}
Also stack pointer should be 16-byte aligned on function call in x86-64, so the assembly function bar should be (for example):
global bar
section .text:
bar:
mov rdx, [rdi]
mov rdi, [rdx]
mov rdx, [rdi]
mov rdi, [rdx]
sub rsp, 8 // adjust stack pointer
call rsi
add rsp, 8 // restore stack pointer
ret
The alignment is done before call and 8 byte (return address) is pushed by call, so another 8 byte should be subtracted from the function pointer to retain 16-byte alignment.
If you want to use int** as the first argument, do dereferences (memory accesses) only 2 times.
global bar
section .text:
bar:
mov rdx, [rdi]
mov rdi, [rdx]
sub rsp, 8
call rsi
add rsp, 8
ret
Another thing you may want to do is
Create stack frame
Store the argument on the stack memory for later use
global bar
section .text:
bar:
push rbp // create stack frame
mov rbp, rsp
sub rsp, 16 // create region for local variables (note 16-byte alignment)
mov [rbp-8], rdi // save the argument to the memory
mov rdx, [rdi]
mov rdi, [rdx]
mov rdi, [rbp-8] // restore the argument from the memory
mov rdx, [rdi]
mov rdi, [rdx]
call rsi
leave // destruct stack frame
ret
I think you might want this:
bar:
mov rax, [rdi]
mov edi, [rax]
call rsi
ret
which would match the void bar(int **i, void (*f)()) prototype
Say I have a struct defined as follows
struct my_struct
{
int num;
};
....
Here I have a pointer to my_struct and I want to do an increment on num
void foo(struct my_struct* my_ptr)
{
// increment num
// method #1
my_ptr->num++;
// method #2
++(my_ptr->num);
// method #3
my_ptr->++num;
}
Do these 3 ways of incrementing num do the same thing?
While we're at it, is it true that pre-increment is more efficient than post-increment?
Thanks!
First two will have the same effect (when on a line on their own like that), but the third method isn't valid C code (you can't put the ++ there).
As for efficiency, there is no difference. The difference you may have heard people talking about is when, in C++, you increment a non-pointer data type, such as an iterator. In some cases, pre-increment can be faster there.
You can see the generated code using GCC Explorer.
void foo(struct my_struct* my_ptr)
{
my_ptr->num++;
}
void bar(struct my_struct* my_ptr)
{
++(my_ptr->num);
}
Output:
foo(my_struct*): # #foo(my_struct*)
incl (%rdi)
ret
bar(my_struct*): # #bar(my_struct*)
incl (%rdi)
ret
As you can see, there's no difference whatsoever.
The only possible difference between the first two is when you use them in expressions:
my_ptr->num = 0;
int x = my_ptr->num++; // x = 0
my_ptr->num = 0;
int y = ++my_ptr->num; // y = 1
If your only intention is to increment the value of num then the 1st and 2nd method will yield same intented result to the callee method.
However, if you change your code to the following, you can see the difference between the code generated by gcc (assembly level code):
struct my_struct
{
int num;
};
void foo(struct my_struct* my_ptr)
{
printf("\nPost Increment: %d", my_ptr->num++);
}
int main()
{
struct my_struct a;
a.num = 10;
foo(&a);
}
Now compile it using: gcc -masm=intel -S structTest.c -o structTest.s
This asks gcc to generate the assembly code:
Open structTest.s in a text editor.
foo:
.LFB0:
push rbp
mov rbp, rsp
sub rsp, 16
**mov QWORD PTR [rbp-8], rdi**
mov rax, QWORD PTR [rbp-8]
mov eax, DWORD PTR [rax]
mov edx, eax
**lea ecx, [rax+1]**
mov rax, QWORD PTR [rbp-8]
mov DWORD PTR [rax], ecx
mov eax, OFFSET FLAT:.LC0
mov esi, edx
mov rdi, rax
mov eax, 0
call printf
leave
ret
.cfi_endproc
main:
.LFB1:
push rbp
mov rbp, rsp
sub rsp, 16
**mov DWORD PTR [rbp-16], 10
lea rax, [rbp-16]
mov rdi, rax
call foo**
leave
ret
.cfi_endproc
And when you change the operation to pre-increment, the follwoing code is generated:
foo:
.LFB0:
.cfi_startproc
push rbp
mov rbp, rsp
sub rsp, 16
**mov QWORD PTR [rbp-8], rdi**
mov rax, QWORD PTR [rbp-8]
mov eax, DWORD PTR [rax]
**lea edx, [rax+1]**
mov rax, QWORD PTR [rbp-8]
**mov DWORD PTR [rax], edx**
mov rax, QWORD PTR [rbp-8]
**mov edx, DWORD PTR [rax]**
mov eax, OFFSET FLAT:.LC0
mov esi, edx
mov rdi, rax
mov eax, 0
call printf
leave
ret
.cfi_endproc
So, you would see that in the second case, the compiler increments the num value and passes on this num value to printf().
In terms of performance, I would expect the post-increment to be more efficient since the memory locations are touched a fewer number of times.
The important lines have been marked between ** in the above code.