strcat overflow? - c

Could someone explain why is my strcat doing this?
I can't seem to find out why I am rewriting on a part of the source string.
The output is like this: New String: HelloThis shall be after my backslash 0 in str1h 0 in str1
global strcat
extern strlen
strcat:
push ebp
mov ebp, esp
push ecx
push esi
push edi
push ebx
push edx
xor edx, edx
xor edi, edi
xor ebx, ebx
xor esi, esi
xor ecx, ecx
mov edi, [ebp + 8]
mov esi, [ebp + 12]
push edi
call strlen
pop edi
mov ecx, eax
xor eax, eax
push esi
call strlen
pop esi
mov ebx, eax
xor eax, eax
cmp [edi + ecx], byte 0b
je PUT_LINE
jmp FINALIZE_END
PUT_LINE:
cmp ebx, eax
je END
mov dl, [esi + eax]
mov [edi + ecx], dl
xor edx, edx
inc eax
inc ecx
jmp PUT_LINE
END:
mov eax, [ebp + 8]
jmp FINALIZE_END
FINALIZE_END:
pop edx
pop ebx
pop edi
pop esi
pop ecx
mov esp, ebp
pop ebp
ret
~
~
int main(int argc, char** argv)
{
(void)argc;
(void)argv;
char* str1;
char* str2;
str1 = strdup("Hello");
str2 = strdup("This shall be after my backslash 0 in str1");
printf("New String : %s\n", strcat(str1, str2));
return (0);
}
~

strcat() appends the characters from one string to another string. The target string is modified. So strcat(str1, str2) modifies str1 to also contain the contents of str2.
Since not enough memory is allocated for str1 to contain the characters from both strings, this leads to an overflow.

Related

Assembly x86-32 and some c functions

I never learn C language so it makes me confuse. I just like to know if I did it correctly or where I need to improve. For this code I used assembly x86 32 bit. Thanks
This is what I supposed to do:
Write a procedure with the signature
char *strchar(char *s1, char c1)
that returns a pointer to the first occurrence of the character c1 within the string s1 or, if not found, returns a null.
This is what I came out with:
strchar (char*, char):
push ebp
mov ebp, esp
mov dword ptr [ebp-24], edi
mov EAX , esi
mov BYTE PTR [ebp-28], al
.L5:
mov EAX , dword ptr [ebp-24]
movzx EAX , byte ptr [ EAX ]
test AL, AL
je .L2
mov EAX , dword PTR [ebp-24]
movzx EAX , BYTE PTR [ EAX ]
cmp BYTE PTR [ebp-28], al
jne .L3
mov eax, dword PTR [ebp-24]
jmp .L6
.L3:
add dword PTR [ebp-24], 1
jmp .L5
.L2:
LEA eax, [ebp-9]
MOV DWORD PTR [EBP-8], eax
MOV EAX, DWORD PTR [ebp-8]
.L6:
POP EBP
RET
The lines:
mov dword ptr [ebp-24], edi
mov EAX , esi
mov BYTE PTR [ebp-28], al
assume that a stack frame has been allocated for this function which doesn’t appear true; I think you should have something like:
sub esp, 32
after the
mov ebp,esp
Also, the three lines after L2 seem confused. The only way to get to L2 is if the nil (0) byte is discovered in the string, at which point, the code should return a NULL pointer.
The exit path in the code (L6) leaves eax alone, so all that should be needed is:
L2:
mov eax, 0
It might make debugging easier if you kept the alias up to date; so:
L2:
mov eax, 0
mov [ebp-24], eax
Also, the calling convention used here is a bit odd: the string is passed in edi and the character in esi. Normally, in x86-32, these would both be passed on the stack. This looks like it might have been x86-64 code, converted to x86-32....
A final note; this assembly code looks like the output of a compiler with optimisations disabled. Often, generating the assembly with the optimisations enabled generates easier to understand code. This code, for example, could be much more concisely written as below, without even devolving into weird intel ops:
strchar:
mov edx, esi
mov eax, edi
L:
mov dh, [eax]
test dh, dh
jz null
cmp dh, dl
je done
inc eax
jmp L
null:
mov eax, 0
done:
ret
Here is one with stack overhead
[global strchar]
strchar:
push ebp
mov ebp, esp
mov dl, byte [ebp + 12]
mov ecx, dword [ebp + 8]
xor eax, eax
.loop: mov al, [ecx]
or al, al
jz .exit
cmp al, dl
jz .found
add ecx, 1
jmp .loop
.found: mov eax, ecx
.exit:
leave
ret
Here is one without stack overhead
[global strchar]
strchar:
mov dl, byte [esp + 8]
mov ecx, dword [esp + 4]
xor eax, eax
.loop: mov al, [ecx]
or al, al
jz .exit
cmp al, dl
jz .found
add ecx, 1
jmp .loop
.found: mov eax, ecx
.exit:
ret
These are using the 'cdecl' calling convention. For 'stdcall' change the last 'ret' to 'ret 8'.

sort the array and print the smallest number in inline assembly in visual studio

I write the following code to read some numbers ranging from -15 to 15 from the user and the user may define how many numbers to enter. Then I bubble sort the array to get the smallest number. (Bubble sort because I will need to print other information) However, the code is not working. Here is my code.
// oops.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
int _tmain(int argc, _TCHAR* argv[])
{
char message0[] = "How many numbers do you want to enter? \n";
char message1[] = "Enter the current reading: \n";
char message2[] = "Error!\n";
char message3[] = "The smallest number is: \n";
char format1[] = "%d";
char format2[] = "%s";;
int myarray[10000];
int No;
int counter;
int *p;
p = myarray - 1;
_asm{
lea eax, message0
push eax
call printf
add esp, 4
//read how many numbers the user would like to input
lea eax,counter
push eax
lea eax, format1
push eax
call scanf_s
add esp,8
mov No, 1
mov ecx, counter
mov ebx, 0
//read user's input
Input: push ecx
push No
lea eax, message1
push eax
call printf
add esp, 8
lea eax, myarray[ebx]
push eax
lea eax, format1
push eax
call scanf_s
add esp,8
//judge if the number is in the range of -15 to 15
JudgeInput: mov eax, myarray[ebx]
cmp eax,-15
jl Illegal
cmp eax,15
jle Legal
Illegal: lea eax,message2
push eax
call printf
add esp,4
pop ecx
jmp Input
Legal: add ebx,4
inc No
pop ecx
loop Input
//bubble sort
mov esi, p
mov ecx, counter
outer : mov edx, ecx
inner : cmp edx, ecx
jz exchangeNo
mov eax, [esi + ecx * 4]
mov ebx, [esi + edx * 4]
cmp eax, ebx
jnb exchangeNo
mov[esi + ecx * 4], ebx
mov[esi + edx * 4], eax
exchangeNo :
dec edx
jnz inner
loop outer
finish:
smallest: //print the smallest number
mov ebx,0
lea eax,message3
push eax
lea eax, format2
push eax
call printf
mov eax,0
lea ebx,myarray
sub ebx,4
add ebx,No
lea eax, [ebx]
push eax
lea eax,format1
call printf
add esp,16
}
return 0;
}
It would not return the smallest number. Sometimes it returns strange characters. I get really confusing. Additionally, when I enter negative numbers, the bubble sort seems not working well.
I have solved the problem. Here is my updated code:
int _tmain(int argc, _TCHAR* argv[])
{
char message0[] = "How many numbers do you want to enter? \n";
char message1[] = "Enter the current reading: \n";
char message2[] = "Error!\n";
char message3[] = "\nThe smallest number is: ";
char format1[] = "%d";
char format2[] = "%s";;
int myarray[10000];
int No;
int counter;
int *p;
p = myarray - 1;
_asm{
lea eax, message0
push eax
call printf
add esp, 4
lea eax,counter
push eax
lea eax, format1
push eax
call scanf_s
add esp,8
//get the user's input into the array
mov No, 1
mov ecx, counter
mov ebx, 0
Input:
push ecx
push No
lea eax, message1
push eax
call printf
add esp, 8
lea eax, myarray[ebx]
push eax
lea eax, format1
push eax
call scanf_s
add esp,8
//judge if the input is between -15 and 15
JudgeInput:
mov eax, myarray[ebx]
cmp eax,-15
jl Illegal
cmp eax,15
jle Legal
//if not, print out error message
Illegal:
lea eax,message2
push eax
call printf
add esp,4
pop ecx
jmp Input
//if yes, loop again
Legal:
add ebx,4
inc No
pop ecx
loop Input
//bubble sort
mov esi, p
mov ecx, counter
//the outer loop
outer : mov edx, ecx
//the inner loop
inner : cmp edx, ecx
je exchangeNo
mov eax, [esi + ecx * 4]
mov ebx, [esi + edx * 4]
cmp eax, ebx
jge exchangeNo
mov[esi + ecx * 4], ebx
mov[esi + edx * 4], eax
exchangeNo :
dec edx
jge inner
loop outer
finish:
//find out the smallest number
smallest :
lea eax, message3
push eax
lea eax, format2
push eax
call printf
lea ebx, myarray
mov eax, [ebx]
push eax
lea eax, format1
push eax
call printf
add esp, 16
}
}

x86 Struct scanf

I am trying to convert C to x86. I am using a struct...
struct person_record_struct
{
char last_name[128];
char first_name[128];
char year_of_birth[10];
int month_of_birth; // January => 1
int day_of_birth; // 1st Day of a Month => 1
char drivers_license_no[128];
};
typedef struct person_record_struct person_record;
I am having trouble getting my scanf to work. Here is the C..
result = scanf("%s\n%s\n%s\n%d\n%d\n%s\n", &records[counter].last_name[0],
&records[counter].first_name[0], &records[counter].year_of_birth[0],
&records[counter].month_of_birth, &records[counter].day_of_birth,
&records[counter].drivers_license_no[0]);
And my x86..
;counter # [ebp-4]
;records # [ebp-16]
; format_string_main_2 db '%s\n%s\n%s\n%d\n%d\n%s\n', 0
; read in info
; push drivers_license_no
mov ebx, [ebp-16] ;
mov eax, [ebp-4]
mov ecx, struct_size
mul ecx
add eax, ebx
lea eax, [eax+276]
push eax
; push day_of_birth
mov ebx, [ebp-16]
mov eax, [ebp-4]
mov ecx, struct_size
mul ecx
add eax, ebx
lea eax, [eax+272]
push eax
; push month_of_birth
mov ebx, [ebp-16]
mov eax, [ebp-4]
mov ecx, struct_size
mul ecx
add eax, ebx
lea eax, [eax+268]
push ax
; push year_of_birth
mov ebx, [ebp-16]
mov eax, [ebp-4]
mov ecx, struct_size
mul ecx
add eax, ebx
lea eax, [eax+256]
push eax
; push first_name
mov ebx, [ebp-16]
mov eax, [ebp-4]
mov ecx, struct_size
mul ecx
add eax, ebx
lea eax, [eax+128]
push eax
; push last_name
mov ebx, [ebp-16]
mov eax, [ebp-4]
mov ecx, struct_size
mul ecx
add eax, ebx
lea eax, [eax+0]
push eax
push format_string_main_2
call scanf
add esp, 28
mov [ebp-12], eax
I'm using a check to see if result is 6 and if it's not my program that prints an error and exits. It keeps having an error and I'm not sure what I am doing wrong. Any help would be much appreciated. Thank you.
This is my calloc call which appears to be correct...
; // allocate the buffer of all the records
; records = (person_record *)calloc(number_of_records, sizeof(person_record));
push struct_size
mov eax, [ebp-8]
push eax
call calloc
add esp, 8
mov [ebp-16], eax
Under month_of_birth you have push ax instead of push eax. This would push only the lower 16 bits of the address on the stack, virtually guaranteeing a crash in scanf. Fix that and it should be OK.
There are many weird/wrong things going on in your code. It will be easier to show a cleaner way. You have not mentioned the Assembler you are using, there are a few for x86 and each has its own syntax. Here is how you can do it using NASM:
extern printf, scanf, calloc, exit, free, puts
global main
struc person_record
.last_name resb 128
.first_name resb 128
.year_of_birth resb 10
.month_of_birth resd 1
.day_of_birth resd 1
.drivers_license_no resb 128
.size equ $ - person_record
endstruc
MAX_RECORDS equ 2
section .data
Space db 32, 0
input_format db "%s%s%s%d%d%s", 0
output_format db "%s %s %s %d %d %s", 0
section .text
main:
push person_record.size
push MAX_RECORDS
call calloc
add esp, 4 * 2
mov esi, eax
mov ebx, eax
mov edi, MAX_RECORDS - 1
.FillRecord:
lea eax, [ebx + person_record.drivers_license_no]
push eax
lea ecx, [ebx + person_record.day_of_birth]
push ecx
lea edx, [ebx + person_record.month_of_birth]
push edx
lea eax, [ebx + person_record.year_of_birth]
push eax
lea ecx, [ebx + person_record.first_name]
push ecx
lea edx, [ebx + person_record.last_name]
push edx
push input_format
call scanf
add esp, 4 * 7
push Space
call puts
add esp, 4 * 1
add ebx, person_record.size
dec edi
jns .FillRecord
mov ebx, esi
mov edi, MAX_RECORDS - 1
.ShowRecord:
lea eax, [ebx + person_record.drivers_license_no]
push eax
mov ecx, [ebx + person_record.day_of_birth]
push ecx
mov edx, [ebx + person_record.month_of_birth]
push edx
lea eax, [ebx + person_record.year_of_birth]
push eax
lea ecx, [ebx + person_record.first_name]
push ecx
lea edx, [ebx + person_record.last_name]
push edx
push output_format
call printf
add esp, 4 * 7
push Space
call puts
add esp, 4 * 1
add ebx, person_record.size
dec edi
jns .ShowRecord
push esi
call free
add esp, 4 * 1
push 0
call exit
And the input and output of 2 records:

Assembly EAX register resetting without reason

I have the following assembly code:
; File: strrev.asm
; A subroutine called from C programs.
; Parameters: string A
; Result: String is reversed and returned.
SECTION .text
global strrev
_strrev: nop
strrev:
push ebp
mov ebp, esp
; registers ebx,esi, and edi must be saved if used
push ebx
push edi
xor esi, esi
xor eax, eax
mov ecx, [ebp+8] ; load the start of the array into ecx
jecxz end ; jump if [ecx] is zero
mov edi, ecx
reverseLoop:
cmp byte[edi], 0
je reverseLoop_1
inc edi
inc eax
jmp reverseLoop
reverseLoop_1:
mov esi, edi ;move end of array into esi
mov edi, ecx ;reset start of array to edi
reverseLoop_2:
mov al, [esi]
mov bl, [edi]
mov [esi], bl
mov [edi], al
inc edi
dec esi
dec eax
jnz reverseLoop_2
end:
pop edi ; restore registers
pop ebx
mov esp, ebp ; take down stack frame
pop ebp
ret
Which works fine until you start looping through reverseLoop_2. Using gdb, eax is listed as being 11, which it should be (this is the length of the string I passed in through a separate c program). This is show in the debugger as:
Breakpoint 2, reverseLoop_2 () at strrev.asm:40
40 mov al, [esi]
(gdb) display $eax
1: $eax = 11
However, if I step through the program to the next line, it resets to 0.
(gdb) next
41 mov bl, [edi]
1: $eax = 0
I need eax to be preserved since its the one keeping track of how many times reverseLoop_2 needs to loop. Why is it resetting to 0 after the call to mov?
If you're using eax as a loop counter, you shouldn't write to it inside the loop :
reverseLoop_2:
mov al, [esi]
Remember that al is the least significant byte of eax :
I think this should work.
mov eax, address of your string
push esi
push edi
mov edi, eax
mov esi, eax
; find end of string
sub ecx, ecx
not ecx
sub al, al
cld
repne scasb
; points to the byte after '0x00'
dec edi
dec edi
; main loop will swap the first with the last byte
; and increase/decrease the pointer until the cross each other
_loop:
cmp esi, edi ; if both pointers meet, we are done
jg _done
mov al, [edi]
mov bl, [esi]
mov [esi], al
mov [edi], bl
inc esi
dec edi
jmp _loop
_done:
pop edi
pop esi

declaring a string in assembly

I have this assembly code that computes some prime numbers:
#include <stdio.h>
int main() {
char format[] = "%d\t";
_asm{
mov ebx, 1000
mov ecx, 1
jmp start_while1
incrementare1:
add ecx, 1
start_while1:
cmp ecx, ebx
jge end_while1
mov edi, 2
mov esi, 0
jmp start_while2
incrementare2:
add edi, 1
start_while2:
cmp edi, ecx
jge end_while2
mov eax, ecx
xor edx, edx
div edi
test edx, edx
jnz incrementare2
mov esi, 1
end_while2:
test esi, esi
jnz incrementare1
push ecx
lea ecx, format
push ecx
call printf
pop ecx
pop ecx
jmp incrementare1
end_while1:
nop
}
return 0;
}
It works fine but I would like to also declare the 'format' string in asm, not in C code. I have tried adding something like format db "%d\t", 0 but it didn't work.
If all else fails there's always the ugly way:
format_minus_1:
mov ecx,0x00096425 ; '%', 'd', '\t', '\0' in little-endian format
lea ecx,format_minus_1 + 1 ; skip past the "mov ecx" opcode
push ecx
call printf
You cannot define objects inside the _asm block with those directives. The C declaration is allocating space on the stack for you so if you want to do something like that inside the _asm block you need to manipulate the stack pointer and initialize the memory yourself:
sub esp, 4
mov [esp], '%'
mov [esp + 1], 'd'
mov [esp + 2], '\t'
mov [esp + 3], '\0'
...
push ecx
push esp + 4
call printf
Note this is one way. Not necessarily the best way. The best way being let C do your memory management for you.

Resources