Let's say I've such function in certain memory address (for ex. 0x643795):
mov ecx, [esp+4]
mov eax, [ecx+19Ch]
and byte ptr [eax+24h], 0
mov ecx, [ecx+60h]
mov [eax+2Ch], ecx
retn
Now, I would like to know, if there's any single way, to change this function, on this address with for ex. such thing:
mov ecx, [esp+8]
mov ecx, [ecx+19Ch]
retn
So, I'll be able to change function in certain address, with my own code (I'm mainly interested in C solution).
C solution, but be careful, do not forget about nop aligment:
#include <windows.h>
__declspec(naked) void code()
{
_asm
{
mov ecx, dword ptr [esp + 8]
mov ecx, dword ptr [ecx + 19Ch]
retn
}
}
__declspec(naked) void code_end()
{
_asm{nop}
}
int main()
{
LPVOID MesBox_addr = GetProcAddress(LoadLibraryA("user32.dll"), "MessageBoxA");
DWORD dwOld;
VirtualProtect(MesBox_addr, (DWORD)&code_end - (DWORD)&code, PAGE_EXECUTE_READWRITE, &dwOld);
CopyMemory(MesBox_addr, (LPVOID)&code, (DWORD)&code_end - (DWORD)&code);
MessageBoxA(0, 0, 0, 0);
return 0;
}
Related
Why is it convention in C and Go to pass a pointer to a variable and change it rather return a new variable with the value?
In C:
#include <stdio.h>
int getValueUsingReturn() {
int value = 42;
return value;
}
void getValueUsingPointer(int* value ) {
*value = 42;
}
int main(void) {
int valueUsingReturn = getValueUsingReturn();
printf("%d\n", valueUsingReturn);
int valueUsingPointer;
getValueUsingPointer(&valueUsingPointer);
printf("%d\n", valueUsingPointer);
return 0;
}
In Go:
package main
import "fmt"
func getValueUsingReturn() int {
value := 42
return value
}
func getValueUsingPointer(value *int) {
*value = 42
}
func main() {
valueUsingReturn := getValueUsingReturn()
fmt.Printf("%d\n", valueUsingReturn)
var valueUsingPointer int
getValueUsingPointer(&valueUsingPointer)
fmt.Printf("%d\n", valueUsingPointer)
}
It there any performance benefits or restrictions in doing one or the other?
First off, I don't know enough about Go to give a judgement on it, but the answer will apply in the case of C.
If you're just working on primitive types like ints, then I'd say there is no performance difference between the two techniques.
When structs come into play, there is a very slight advantage of modifying a variable via pointer (based purely on what you're doing in your code)
#include <stdio.h>
struct Person {
int age;
const char *name;
const char *address;
const char *occupation;
};
struct Person getReturnedPerson() {
struct Person thePerson = {26, "Chad", "123 Someplace St.", "Software Engineer"};
return thePerson;
}
void changeExistingPerson(struct Person *thePerson) {
thePerson->age = 26;
thePerson->name = "Chad";
thePerson->address = "123 Someplace St.";
thePerson->occupation = "Software Engineer";
}
int main(void) {
struct Person someGuy = getReturnedPerson();
struct Person theSameDude;
changeExistingPerson(&theSameDude);
return 0;
}
GCC x86-64 11.2
With No Optimizations
Returning a struct variable through the function's return is slower because the variable has to be "built" by assigning the desired values, after which, the variable is copied to the return value.
When you're modifying a variable by pointer indirection, there is nothing to do except write the desired values to the memory addresses (based off the pointer you passed in)
.LC0:
.string "Chad"
.LC1:
.string "123 Someplace St."
.LC2:
.string "Software Engineer"
getReturnedPerson:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-40], rdi
mov DWORD PTR [rbp-32], 26
mov QWORD PTR [rbp-24], OFFSET FLAT:.LC0
mov QWORD PTR [rbp-16], OFFSET FLAT:.LC1
mov QWORD PTR [rbp-8], OFFSET FLAT:.LC2
mov rcx, QWORD PTR [rbp-40]
mov rax, QWORD PTR [rbp-32]
mov rdx, QWORD PTR [rbp-24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov rax, QWORD PTR [rbp-16]
mov rdx, QWORD PTR [rbp-8]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], rdx
mov rax, QWORD PTR [rbp-40]
pop rbp
ret
changeExistingPerson:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-8], rdi
mov rax, QWORD PTR [rbp-8]
mov DWORD PTR [rax], 26
mov rax, QWORD PTR [rbp-8]
mov QWORD PTR [rax+8], OFFSET FLAT:.LC0
mov rax, QWORD PTR [rbp-8]
mov QWORD PTR [rax+16], OFFSET FLAT:.LC1
mov rax, QWORD PTR [rbp-8]
mov QWORD PTR [rax+24], OFFSET FLAT:.LC2
nop
pop rbp
ret
main:
push rbp
mov rbp, rsp
sub rsp, 64
lea rax, [rbp-32]
mov rdi, rax
mov eax, 0
call getReturnedPerson
lea rax, [rbp-64]
mov rdi, rax
call changeExistingPerson
mov eax, 0
leave
ret
With Slight Optimization
However, most compilers today can figure out what you're trying to do here, and will equalize the performance between the two techniques.
If you want to be absolutely stingy, passing pointers is still slightly faster by a few clock cycles at best.
In returning a variable from the function, you still have to at least set the address of the return value.
mov rax, rdi
But in passing the pointer, not even this is done.
But other than that, the two techniques have no performance difference.
.LC0:
.string "Chad"
.LC1:
.string "123 Someplace St."
.LC2:
.string "Software Engineer"
getReturnedPerson:
mov rax, rdi
mov DWORD PTR [rdi], 26
mov QWORD PTR [rdi+8], OFFSET FLAT:.LC0
mov QWORD PTR [rdi+16], OFFSET FLAT:.LC1
mov QWORD PTR [rdi+24], OFFSET FLAT:.LC2
ret
changeExistingPerson:
mov DWORD PTR [rdi], 26
mov QWORD PTR [rdi+8], OFFSET FLAT:.LC0
mov QWORD PTR [rdi+16], OFFSET FLAT:.LC1
mov QWORD PTR [rdi+24], OFFSET FLAT:.LC2
ret
main:
mov eax, 0
ret
I think the short answer to you question (At least for C, I am not familiar with GO internals) is that C functions are pass by value and generally also return by value so data objects must be copied and people worried about the performance of all the copying. For large objects or objects that are complex in their depth (containing pointers to other stuff) it is often just more efficient or logical for the value being copied to be a pointer so the function can "operate" on the data without needing to copy it.
That being said, modern compilers are pretty smart about figuring out stuff like whether the parameter data will fit in registers or efficiently copying returned structures.
Bottom line is for modern C code do what seems best for your application or what is clearest to you. Avoid premature optimization if it detracts from readability at least in the beginning.
Also Compiler Explorer (https://godbolt.org/) is your friend if you want to examine the effect of different styles, especially in light of optimization.
Look at this code. I return an address of the compound literal here.
#include <stdio.h>
#define FOO(bar) ((bar)->a + (bar)->b)
struct bar {
int a;
int b;
};
static struct bar * to_bar(int a, int b);
int main(void)
{
int baz = FOO((struct bar *) {to_bar(1, 2)});
printf("%d\n", baz);
return 0;
}
static struct bar *
to_bar(int a, int b)
{
return &(struct bar) {a, b};
}
Output:
3
ISO/IEC 9899 says:
If the compound literal occurs outside the body of a function, the
object has static storage duration; otherwise, it has automatic
storage duration associated with the enclosing block.
I. e., in the to_bar function the unnamed object, created by the compound literal has automatic storage duration. Thereby, it will be destroyed outside scope of to_bar. It seems, this code produces undefined behaviour (based on the standard). Is it so?
You are right. In your example, you immediately retrieved the fields after returning from to_bar, so you didn't have time to corrupt the stack frame of the deceased to_bar function. But here's another example:
struct bar {
int a;
int b;
};
static struct bar * to_bar(int a, int b);
int main(void)
{
struct bar * corrupted_bar = to_bar(1, 2);
printf("this print will corrupt\n");
int baz = corrupted_bar->a + corrupted_bar->b;
printf("baz = %d\n", baz);
return 0;
}
static struct bar *
to_bar(int a, int b)
{
return &(struct bar) {a, b};
}
which when executed
this print will corrupt
baz = -59543507
If you look at the assembly
.LC0:
.string "this print will corrupt"
.LC1:
.string "baz = %d\n"
main:
push rbp
mov rbp, rsp
sub rsp, 16
mov esi, 2
mov edi, 1
call to_bar ; call to_bar
mov QWORD PTR [rbp-8], rax ; save address returned to a local pointer
mov edi, OFFSET FLAT:.LC0 ; argument into puts()
call puts ; call puts(), which creates its own local variables that corrupts the bar struct
mov rax, QWORD PTR [rbp-8]
mov edx, DWORD PTR [rax]
mov rax, QWORD PTR [rbp-8]
mov eax, DWORD PTR [rax+4]
add eax, edx
mov DWORD PTR [rbp-12], eax
mov eax, DWORD PTR [rbp-12]
mov esi, eax
mov edi, OFFSET FLAT:.LC1
mov eax, 0
call printf
mov eax, 0
leave
ret
to_bar:
push rbp
mov rbp, rsp
mov DWORD PTR [rbp-20], edi
mov DWORD PTR [rbp-24], esi
mov eax, DWORD PTR [rbp-20]
mov DWORD PTR [rbp-8], eax ; field 'a' gets stored, notice dest addr rbp-8 is in the stack frame of this function (local variable)
mov eax, DWORD PTR [rbp-24]
mov DWORD PTR [rbp-4], eax ; field 'b' gets stored, same as above
lea rax, [rbp-8]
pop rbp
ret
I'm running into the following error for my _asm code in Visual Studio:
"First-chance exception at 0x00431BCF in HW7.exe: 0xC0000005: Access violation writing location 0x00000000."
#include <stdio.h>
#include <stddef.h>
// Function to implement strncpy() function
char* strncpy(char* destination, const char* source, size_t num)
{
_asm {
push ebp
mov ebp, esp
sub esp, 208; 000000d0H
push ebx
push esi
push edi
lea edi, DWORD PTR[ebp - 208]
mov ecx, 52; 00000034H
mov eax, -858993460; ccccccccH
rep stosd
cmp DWORD PTR[destination], 0
jne label_3
xor eax, eax
jmp label_4
label_3:
mov eax, DWORD PTR[destination]
mov DWORD PTR[ebp-08h], eax
label_2:
mov eax, DWORD PTR[source]
movsx ecx, BYTE PTR[eax]
test ecx, ecx
je label_1
mov eax, DWORD PTR[num]
mov DWORD PTR[ebp-0D0h], eax
mov ecx, DWORD PTR[num]
sub ecx, 1
mov DWORD PTR[num], ecx
cmp DWORD PTR[ebp-0D0h], 0
je label_1
mov eax, DWORD PTR[destination]
mov ecx, DWORD PTR[source]
mov dl, BYTE PTR[ecx]
mov BYTE PTR[eax], dl
mov eax, DWORD PTR[destination]
add eax, 1
mov DWORD PTR[destination], eax
mov eax, DWORD PTR[source]
add eax, 1
mov DWORD PTR[source], eax
jmp label_2
label_1:
mov eax, DWORD PTR[destination]
mov BYTE PTR[eax], 0
mov eax, DWORD PTR[ebp-08h]
label_4:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
}
/*
// return if no memory is allocated to the destination
if (destination == NULL)
return NULL;
// take a pointer pointing to the beginning of destination string
char* ptr = destination;
// copy first num characters of C-string pointed by source
// into the array pointed by destination
while (*source && num--)
{
*destination = *source;
destination++;
source++;
}
// null terminate destination string
*destination = '\0';
// destination is returned by standard strncpy()
return ptr;
*/
}
// Implement strncpy() function in C
int main(void)
{
/*
char* source = "Crisp Rat";
char destination[12];
*/
/*
char* source = "Christians Later";
char destination[12];
*/
char* source = "Huge Ackman";
char destination[12];
size_t num = 11;
// Copies the first num characters of source to destination
printf("%s\n", strncpy(destination, source, num));
return 0;
}
I've stepped through the code and the error occurs at the instruction "mov byte ptr[eax],dl".
My C code equivalent of the asm code is commented out just below it. I appreciate any help very much, thank you.
I wrote this classic function : (in 32-bit mode)
void ex(size_t a, size_t b)
{
size_t c;
c = a;
a = b;
b = c;
}
I call it inside the main as follows :
size_t a = 4;
size_t b = 5;
ex(a,b);
What I was expecting from the assembly code generated when entering the function is something like this :
1-Push the values of b and a in the stack : (which was done)
mov eax,dword ptr [b]
push eax
mov ecx,dword ptr [a]
push ecx
2-Use the values of a and b in the stack :
push ebp
mov ebp, esp
sub esp, 4
c = a;
mov eax, dword ptr [ebp+8]
mov dword ptr [ebp-4], eax
and so on for the other variables.
However, this is what I find when debugging :
push ebp
mov ebp,esp
sub esp,0CCh // normal since it's in debug with ZI option
push ebx
push esi
push edi
lea edi,[ebp-0CCh]
mov ecx,33h
mov eax,0CCCCCCCCh
rep stos dword ptr es:[edi]
size_t c;
c = a;
mov eax,dword ptr [a]
mov dword ptr [c],eax
Why is it using the variable a directly instead of calling the value stored in the stack? I don't understand...
The debugger doesn't show the instruction using ebp to access a. The same syntax is permitted when you write inline assembly. Otherwise the reason that dword ptr still appears.
It is easy to get it your preferred way, right click > untick "Show Symbol Names".
Using the assembly output option (right click on file name, properties, ...), I get what you expect from debug assembly output. This could depend on which version of VS you use. For this example, I used VS2005. I have VS2015 on a different system, but didn't try it yet.
_c$ = -8 ; size = 4
_a$ = 8 ; size = 4
_b$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd ;fill with 0cccccccch
mov eax, DWORD PTR _a$[ebp]
mov DWORD PTR _c$[ebp], eax
mov eax, DWORD PTR _b$[ebp]
mov DWORD PTR _a$[ebp], eax
mov eax, DWORD PTR _c$[ebp]
mov DWORD PTR _b$[ebp], eax
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP
Note this doesn't work, you need to use pointers for the swap to work.
void ex(size_t *pa, size_t *pb)
{
size_t c;
c = *pa;
*pa = *pb;
*pb = c;
}
which gets translated into:
_c$ = -8 ; size = 4
_pa$ = 8 ; size = 4
_pb$ = 12 ; size = 4
_ex PROC ; COMDAT
push ebp
mov ebp, esp
sub esp, 204 ; 000000ccH
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-204]
mov ecx, 51 ; 00000033H
mov eax, -858993460 ; ccccccccH
rep stosd
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR [eax]
mov DWORD PTR _c$[ebp], ecx
mov eax, DWORD PTR _pa$[ebp]
mov ecx, DWORD PTR _pb$[ebp]
mov edx, DWORD PTR [ecx]
mov DWORD PTR [eax], edx
mov eax, DWORD PTR _pb$[ebp]
mov ecx, DWORD PTR _c$[ebp]
mov DWORD PTR [eax], ecx
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
_ex ENDP
Say I have a struct defined as follows
struct my_struct
{
int num;
};
....
Here I have a pointer to my_struct and I want to do an increment on num
void foo(struct my_struct* my_ptr)
{
// increment num
// method #1
my_ptr->num++;
// method #2
++(my_ptr->num);
// method #3
my_ptr->++num;
}
Do these 3 ways of incrementing num do the same thing?
While we're at it, is it true that pre-increment is more efficient than post-increment?
Thanks!
First two will have the same effect (when on a line on their own like that), but the third method isn't valid C code (you can't put the ++ there).
As for efficiency, there is no difference. The difference you may have heard people talking about is when, in C++, you increment a non-pointer data type, such as an iterator. In some cases, pre-increment can be faster there.
You can see the generated code using GCC Explorer.
void foo(struct my_struct* my_ptr)
{
my_ptr->num++;
}
void bar(struct my_struct* my_ptr)
{
++(my_ptr->num);
}
Output:
foo(my_struct*): # #foo(my_struct*)
incl (%rdi)
ret
bar(my_struct*): # #bar(my_struct*)
incl (%rdi)
ret
As you can see, there's no difference whatsoever.
The only possible difference between the first two is when you use them in expressions:
my_ptr->num = 0;
int x = my_ptr->num++; // x = 0
my_ptr->num = 0;
int y = ++my_ptr->num; // y = 1
If your only intention is to increment the value of num then the 1st and 2nd method will yield same intented result to the callee method.
However, if you change your code to the following, you can see the difference between the code generated by gcc (assembly level code):
struct my_struct
{
int num;
};
void foo(struct my_struct* my_ptr)
{
printf("\nPost Increment: %d", my_ptr->num++);
}
int main()
{
struct my_struct a;
a.num = 10;
foo(&a);
}
Now compile it using: gcc -masm=intel -S structTest.c -o structTest.s
This asks gcc to generate the assembly code:
Open structTest.s in a text editor.
foo:
.LFB0:
push rbp
mov rbp, rsp
sub rsp, 16
**mov QWORD PTR [rbp-8], rdi**
mov rax, QWORD PTR [rbp-8]
mov eax, DWORD PTR [rax]
mov edx, eax
**lea ecx, [rax+1]**
mov rax, QWORD PTR [rbp-8]
mov DWORD PTR [rax], ecx
mov eax, OFFSET FLAT:.LC0
mov esi, edx
mov rdi, rax
mov eax, 0
call printf
leave
ret
.cfi_endproc
main:
.LFB1:
push rbp
mov rbp, rsp
sub rsp, 16
**mov DWORD PTR [rbp-16], 10
lea rax, [rbp-16]
mov rdi, rax
call foo**
leave
ret
.cfi_endproc
And when you change the operation to pre-increment, the follwoing code is generated:
foo:
.LFB0:
.cfi_startproc
push rbp
mov rbp, rsp
sub rsp, 16
**mov QWORD PTR [rbp-8], rdi**
mov rax, QWORD PTR [rbp-8]
mov eax, DWORD PTR [rax]
**lea edx, [rax+1]**
mov rax, QWORD PTR [rbp-8]
**mov DWORD PTR [rax], edx**
mov rax, QWORD PTR [rbp-8]
**mov edx, DWORD PTR [rax]**
mov eax, OFFSET FLAT:.LC0
mov esi, edx
mov rdi, rax
mov eax, 0
call printf
leave
ret
.cfi_endproc
So, you would see that in the second case, the compiler increments the num value and passes on this num value to printf().
In terms of performance, I would expect the post-increment to be more efficient since the memory locations are touched a fewer number of times.
The important lines have been marked between ** in the above code.