I've been trying to go from a line of code in c to assembly, but I just can't figure out what would be the correct translation of the pointer-to-member function using asm.
Here is a fragment of the code:
struct file{
int size;
}FILE;
void function(FILE *result){
result -> size;
}
Assuming a x86-64 compiler,
_function:
; rdi = pointer to struct file
; rax = size element
mov eax, [rdi]
Related
There is a code excerpt from official Quake 2 source code:
unsigned *buf;
dheader_t header;
...
header = *(dheader_t *)buf; // #1
for (i=0 ; i<sizeof(dheader_t)/4 ; i++)
((int *)&header)[i] = LittleLong ( ((int *)&header)[i]); // #2
Can someone please explain me in the most possible details what do the line #1 and then #2 really do because I'm little or more confused...
P.S
Here is the rest of the definitions if it helps:
int LittleLong (int l) {return _LittleLong(l);}
...
typedef struct
{
int ident;
int version;
lump_t lumps[HEADER_LUMPS];
} dheader_t;
P.S. 2
I've linked above the original full source file code if needed.
This is some seriously brittle code and you shouldn't write code like this.
What it does is to go through the struct int by int, then does something with each such int inside _LittleLong. Very likely this function performs a 32 bit conversion from a big endian integer to a little endian one. Meaning that the source you are looking at is likely something related to reception of IP packages.
Checking at what the code does step by step:
for (i=0 ; i<sizeof(dheader_t)/4 ; i++) is a sloppier way of writing sizeof(dheader_t)/sizeof(int). That is: iterate through the struct int by int, chunks of 32 bits.
(int *)&header converts from a dheader_t* to a int*. This is actually well-defined by a special rule in C that allows us to convert from a pointer to a struct to a pointer to its first member or vice versa and the first member is int.
However, doing so is only well-defined for the first member. Instead they take the converted int* and apply array dereferencing on it: ((int *)&header)[i]. This is undefined behavior in C, a so-called strict aliasing violation, and could also cause alignment problems in some situations. Bad.
The int read from the struct through this dereferencing is then passed along to LittleLong which very likely does a big -> little endian conversion.
((int *)&header)[i] = and here it is written back to where it was grabbed from.
Better, safer, well-defined and possibly faster code could look like:
void endianify (dheader_t* header)
{
_Static_assert(sizeof(dheader_t)%sizeof(uint32_t)==0,
"Broken struct: dheader_t");
unsigned char* start = (unsigned char*)header;
unsigned char* end = start + sizeof(dheader_t);
for(unsigned char* i=start; i!=end; i+=sizeof(uint32_t))
{
uint32_t tmp;
memcpy(&tmp,i,sizeof(uint32_t));
i[0]= (tmp >> 24) & 0xFF;
i[1]= (tmp >> 16) & 0xFF;
i[2]= (tmp >> 8) & 0xFF;
i[3]= (tmp >> 0) & 0xFF;
}
}
Disassembly:
endianify:
mov eax, DWORD PTR [rdi]
bswap eax
mov DWORD PTR [rdi], eax
mov eax, DWORD PTR [rdi+4]
bswap eax
mov DWORD PTR [rdi+4], eax
mov eax, DWORD PTR [rdi+8]
bswap eax
mov DWORD PTR [rdi+8], eax
mov eax, DWORD PTR [rdi+12]
bswap eax
mov DWORD PTR [rdi+12], eax
mov eax, DWORD PTR [rdi+16]
bswap eax
mov DWORD PTR [rdi+16], eax
ret
Windows stores the TEB in FS (32bit) or GS (64bit) segment register. In a program using NtCurrentPeb() the x86 instruction is mov rax, gs:60h. The 0x60 value is offsetof(TEB, ProcessEnvironmentBlock).
To use this in a program I've to include both Windows.h and Winternl.h header file which has bunch of other #define. As the question said I want to use the function without these header file and by directly accessing the segment register. I've also made a separate header file with the TEB and PEB structure. So how can I do that? I was thinking with __asm keyword and a typedef NtCurrentTeb() or something.
I really do not understand why you answered your own question incompletely. This confuses further readers because you did not provide the appropriate answer to the question itself.
You do not need to use ASM for this, you can use intrinsic functions like so:
#ifdef _M_X64
auto pPEB = (PPEB)__readgsqword(0x60);
#elif _M_IX86
auto pPEB = (PPEB)__readfsdword(0x30);
#else
#error "PPEB Architecture Unsupported"
#endif
But to answer the actual question, here is how to do is via ASM:
x64 ASM (TEB/PEB):
GetTEBAsm64 proc
mov rax, qword ptr gs:[00000030h]
ret
GetTEBAsm64 endp
GetPEBAsm64 proc
mov rax, qword ptr gs:[00000060h]
ret
GetPEBAsm64 endp
x86 - PEB:
__asm
{
mov eax, dword ptr fs : [00000030h]
mov peb, eax
}
x86 - TEB:
__asm
{
mov eax, dword ptr fs : [00000018h]
mov teb, eax
}
I strongly hope that my answer is clear and that someone else in the future can benefit from it.
Declare function prototype and link against ntdll.dll.
To read from gs or fs segment register, I have used this assembly in Visual Studio. Create a C/C++ empty project in Visual Studio with these settings enabled. fs or gs segment register provides NT_TIB structure in 32 bit and 64 bit Windows respectively. TEB is at 0x30 offset in NT_TIB structure. So the assembly in 64 bit will be: mov rax, gs:[30h].
Here is a sample source code to get current directory of an executable file:
ProcParam.asm:
.code
ProcParam PROC
mov rax, gs:[30h] ; TEB from gs in 64 bit only
mov rax, [rax+60h] ; PEB
mov rax, [rax+20h] ; RTL_USER_PROCESS_PARAMETERS
ret
ProcParam ENDP
end
main.c:
#include <stdio.h>
typedef struct _UNICODE_STRING {
unsigned short Length;
unsigned short MaximumLength;
wchar_t* Buffer;
} UNICODE_STRING, *PUNICODE_STRING;
typedef struct _CURDIR {
UNICODE_STRING DosPath;
void* Handle;
} CURDIR, *PCURDIR;
/*Extracted from ntdll.pdb file*/
typedef struct _RTL_USER_PROCESS_PARAMETERS {
unsigned int MaximumLength;
unsigned int Length;
unsigned int Flags;
unsigned int DebugFlags;
void* ConsoleHandle;
unsigned int ConsoleFlags;
void* StandardInput;
void* StandardOutput;
void* StandardError;
CURDIR CurrentDirectory;
/*Many more*/
} RTL_USER_PROCESS_PARAMETERS, *PRTL_USER_PROCESS_PARAMETERS;
PRTL_USER_PROCESS_PARAMETERS ProcParam(void);
int main(void)
{
wprintf(L"%s\n", ProcParam()->CurrentDirectory.DosPath.Buffer);
}
In a C method, given the only local variable int i (uninitialized), that i'd like to store in the register %ecx, and given the following struct located in %ebp+8:
typedef struct {
char c;
int k;
int *m;
} S1;
how do I translate into assembly (at&t syntax) the following code:
i=*(a.m);
i=i+a.k;
Thanks!
Given that i is int, in masm it's going to be something like:
;i = *(a.m);
mov eax, [ebp+13] ; 13 = +8+1+4
mov ecx, [eax] ; store i in ecx
;i = i + a.k;
mov eax, ptr [ebp+9] ; 9 = +8+1
add ecx, eax ; new value of i
This question regards the difference between the volatile and extern variable and also the compiler optimization.
One extern variable defined in main file and used in one more source file, like this:
ExternTest.cpp:
short ExtGlobal;
void Fun();
int _tmain(int argc, _TCHAR* argv[])
{
ExtGlobal=1000;
while (ExtGlobal < 2000)
{
Fun();
}
return 0;
}
Source1.cpp:
extern short ExtGlobal;
void Fun()
{
ExtGlobal++;
}
The assembly generated for this in the vs2012 as below:
ExternTest.cpp assembly for accessing the external variable
ExtGlobal=1000;
013913EE mov eax,3E8h
013913F3 mov word ptr ds:[01398130h],ax
while (ExtGlobal < 2000)
013913F9 movsx eax,word ptr ds:[1398130h]
01391400 cmp eax,7D0h
01391405 jge wmain+3Eh (0139140Eh)
Source.cpp assembly for modifying the extern variable
ExtGlobal++;
0139145E mov ax,word ptr ds:[01398130h]
01391464 add ax,1
01391468 mov word ptr ds:[01398130h],ax
From the above assembly, every access to the variable "ExtGlobal" in the while loop reads the value from the corresponding address. If i add volatile to the external variable the same assembly code was generated. Volatile usage in two different threads and external variable usage in two different functions are same.
Asking about extern and volatile is like asking about peanuts and gorillas. They're completely unrelated.
extern is used simply to tell the compiler, "Hey, don't expect to find the definition of this symbol in this C file. Let the linker fix it up at the end."
volatile essentially tells the compiler, "Never trust the value of this variable. Even if you just stored a value from a register to that memory location, don't re-use the value in the register - make sure to re-read it from memory."
If you want to see that volatile causes different code to be generated, write a series of reads/writes from the variable.
For example, compiling this code in cygwin, with gcc -O1 -c,
int i;
void foo() {
i = 4;
i += 2;
i -= 1;
}
generates the following assembly:
_foo proc near
mov dword ptr ds:_i, 5
retn
_foo endp
Note that the compiler knew what the result would be, so it just went ahead and optimized it.
Now, adding volatile to int i generates the following:
public _foo
_foo proc near
mov dword ptr ds:_i, 4
mov eax, dword ptr ds:_i
add eax, 2
mov dword ptr ds:_i, eax
mov eax, dword ptr ds:_i
sub eax, 1
mov dword ptr ds:_i, eax
retn
_foo endp
The compiler never trusts the value of i, and always re-loads it from memory.
I'm using IDA Pro to disassemble the following C code: However looking at the disassembly below it seems to me incomplete. The data is never initialized (as per C code) even though it does appear to be loaded into the stack however the procedure (nullsub_1) that is located at 00401040 makes no use of the data ? Am I making a correct assessment or am I missing something ??? I have used Visual C++ 6/2005 to compile the C code.
#include <stdio.h>
#include <windows.h>
struct a
{
char s[10];
BYTE b;
int i;
};
a al;
void init(a);
void main()
{
init(al);
};
void init(a c)
{
for(int j = 0; j < 10; j++) c.s[j] = 'A';
c.b = 10;
c.i = 10000;
};
.text:00401000 ; int __cdecl main(int argc,const char **argv,const char *envp)
.text:00401000 _main proc near ; CODE XREF: start+AFp
.text:00401000
.text:00401000 argc = dword ptr 4
.text:00401000 argv = dword ptr 8
.text:00401000 envp = dword ptr 0Ch
.text:00401000
.text:00401000 mov ecx, dword_4084C0
.text:00401006 mov edx, dword_4084C4
.text:0040100C sub esp, 10h
.text:0040100F mov eax, esp
.text:00401011 mov [eax], ecx
.text:00401013 mov ecx, dword_4084C8
.text:00401019 mov [eax+4], edx
.text:0040101C mov edx, dword_4084CC
.text:00401022 mov [eax+8], ecx
.text:00401025 mov [eax+0Ch], edx
.text:00401028 call nullsub_1
.text:0040102D add esp, 10h
.text:00401030 retn
.text:00401030 _main endp
.text:00401030
.text:00401030 ;
.text:00401031 align 10h
.text:00401040
.text:00401040
.text:00401040
.text:00401040 nullsub_1 proc near ; CODE XREF: _main+28p
.text:00401040 retn
.text:00401040 nullsub_1 endp
Your source code has no side effects other than just writing to memory. The compiler eliminates those writes as useless.
You may have better luck if you compile it in Debug mode (instead of Release) or turn off some compiler optimizations.
Alternatively, accesses to variables defined as volatile will be preserved, so you can add volatile in your code.