I am finishing up an assembly program that replaces characters in a string with a given replacement character. The assembly code calls C functions and the assembly program itself is called from main in my .c file. However, when trying to finish and return a final int value FROM the assembly program TO C, I get segfaults. My .asm file is as follows:
; File: strrepl.asm
; Implements a C function with the prototype:
;
; int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
;
;
; Result: chars in string are replaced with the replacement character and string is returned.
SECTION .text
global strrepl
_strrepl: nop
strrepl:
push ebp ; set up stack frame
mov ebp, esp
push esi ; save registers
push ebx
xor eax, eax
mov ecx, [ebp + 8] ;load string (char array) into ecx
jecxz end ;jump if [ecx] is zero
mov al, [ebp + 12] ;move the replacement character into esi
mov edx, [ebp + 16] ;move function pointer into edx
firstLoop:
xor eax, eax
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
push eax ; parameter for (*isinsubset)
mov edx, [ebp + 16]
call edx ; execute (*isinsubset)
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
mov eax, [ebp+12]
mov [ecx], al
mov edx, [ebp+16]
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
end:
pop ebx ; restore registers
pop esi
mov esp, ebp ; take down stack frame
pop ebp
mov eax, 9
push eax ;test
ret
and my c file is:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
//display *((char *) $edi)
// These functions will be implemented in assembly:
//
int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
int isvowel (int c) {
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
return 1 ;
if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
return 1 ;
return 0 ;
}
int main(){
char *str1;
int r;
str1 = strdup("ABC 123 779 Hello World") ;
r = strrepl(str1, '#', &isdigit) ;
printf("str1 = \"%s\"\n", str1) ;
printf("%d chararcters were replaced\n", r) ;
free(str1) ;
return 0;
}
In my assembly code, you can see in end
mov eax, 9
push eax
I am simply trying to return the value 9 to the value "r" which is an int in the C file. This is just a test to see if I can return an int back to r in the c file. Eventually I will be returning the number of characters that were replaced back to r. However, I need to figure out why the following code above is segfaulting. Any ideas?
mov eax, 9
push eax ; NOT a good idea
ret
That is a big mistake. It's going to return based on the lowest thing on the stack and you've just pushed something on to the stack that's almost certainly not a valid return address.
Most functions return a code by simply placing it into eax (this depends on calling convention of course but that's a pretty common one), there's generally no need to push it on to the stack, and certainly plenty of downside to doing so.
Return values are normally stored in EAX on X86 32 bit machines. So your pushing it on the stack after storing it in EAX is wrong, because the function it is returning to will try to use what is in EAX as a value for IP (instruction pointer)
Ret with no argument pops the return address off of the stack and jumps to it.
source
Related
I'm trying to implement a simple addition calculator, but I'm not able to store the input in my array. I'm trying to read char by char because I want to use it later to implement a backend for my B compiler (which has the getchar function that reads char by char from stdin). My code is the fallowing:
segment .data
numb db 0, 0, 0, 0
indx db 0
char db '0'
newl db 0ah
msg1 db 'enter a number: '
len1 equ $ - msg1
segment .text
global _start ; defines the entry point
print: ; push msg; push len
pop eax ; removes caller address from stack
pop edx ; gets length
pop ecx ; gets msg
push eax ; pushes CA to stack again
mov ebx , 01h ; tells that it's an output call
mov eax , 04h ; system call (write)
int 80h ; calls it
ret
getc: ; push add; push len
pop eax ; removes caller address from stack
pop ecx ; gets ouput addrress
push eax ; pushes CA to stack again
mov edx , 01h
mov ebx , 00h ; tells that it's an input call
mov eax , 03h ; system call (read)
int 80h ; calls it
ret
exit:
mov ebx , 0 ; sets exit code
mov eax , 01h ; system call (exit)
int 80h ; calls it
_start:
push msg1
push len1
call print
read:
push char
call getc
mov eax , numb
add eax , indx
mov [eax], dword char
inc byte [indx]
mov eax , char
cmp eax , newl
jne read
jmp exit ; exits program
for now I'm just trying to store the input, because I got segfaults from the complete code, so I started stripping off code until I found the error cause.
You probably don't want to insert the newline in the array, so start with checking for the newline:
read:
push char
call getc
mov al, [char]
cmp al, 10
je done
Then load the byte-sized index in an address register, remembering that AL already contains the datum, so pick another register than EAX. Also, instead of adding the array address numb and the index indx yourself, let the CPU do that for your with an addressing mode that has a displacement component ([numb + ebx]):
movzx ebx, byte [indx]
mov [numb + ebx], al
inc byte [indx]
jmp read
done:
jmp exit
There's also the possibility to define the index indx as a dword with indx dd 0. Then the code becomes:
read:
push char
call getc
mov al, [char]
cmp al, 10
je done
mov ebx, [indx]
mov [numb + ebx], al
inc dword [indx]
jmp read
done:
jmp exit
The lesson here is that NASM is different from MASM in how you address memory:
MASM
mov eax, offset MyVar ; Load address of MyVar
mov eax, MyVar ; Load value stored in MyVar
NASM
mov eax, MyVar ; Load address of MyVar
mov eax, [MyVar] ; Load value stored in MyVar
I am creating a program which reads a list of integers seperated by a single space via console and printing the sum of all the integers. The main problem is extracting the integers from the string array into a signed integer array.
Some examples of input are "-20 30 5" (each integer is seperated by a single space) or " [space]-20 30 5 [space]" (there may be spaces between the beginning and the end of the list, but the numbers are still seperated by a single space)
Also, after printing the sum, the program returns to reading another input unless only the enter key is typed.
After writing the code and pressing the Debug button, I am getting these two following build errors:
A2005 symbol redefinition: InBuffer
A2111 conflicting parameter definition
I've checked the error messages and apparently both of them are related to the PROTO and PROC directives. But there seems to be no problems regarding the parameter definition.
Here is my code.
INCLUDE Irvine32.inc
ArrayGet PROTO, ; convert string array into int array
inBuffer: PTR BYTE,
inBufferN: DWORD,
intArray: PTR SDWORD
.data
BUF_SIZE EQU 256
inBuffer BYTE BUF_SIZE DUP(?) ; input buffer
inBufferN DWORD ? ; length of input
intArray SDWORD BUF_SIZE/2 DUP(?) ; integer array for storing converted string
intArrayN DWORD ? ; number of integers
prompt BYTE "Enter numbers(<ent> to exit) : ", 0
bye BYTE "Bye!", 0
.code
main PROC
L1:
mov esi, 0
mov edx, OFFSET prompt
call WriteString
mov edx, OFFSET inBuffer
mov ecx, BUF_SIZE
call ReadString
cmp inBuffer[0], 0ah
je L3 ; only typing <ent> ends the program
mov inBufferN, eax
mov ecx, inBufferN
SpaceCheck: ; calls procedure when it finds a number
cmp inBuffer[esi], 20h
jne L2
inc esi
loop SpaceCheck
jmp L1
L2:
INVOKE ArrayGet, ADDR inBuffer, inBufferN, ADDR intArray ; put inBuffer offset on edx, inBufferN on ecx
mov intArrayN, eax
mov ecx, intArrayN
mov eax, 0
mov esi, OFFSET intArray
Ladd: ; adding the integer array
add eax, [esi]
inc esi
loop Ladd
call WriteInt
call CRLF
jmp L1
L3:
mov edx, OFFSET bye
call WriteString
exit
main ENDP
; procedure definition
ArrayGet PROC USES edx ecx,
inBuffer : PTR BYTE,
inBufferN: DWORD,
intArray: PTR SDWORD
LOCAL ArrayNum: DWORD
mov ArrayNum, 0
mov ecx, inBufferN
sub ecx, esi ; ecx(loop count) from first char to the end
LOOP1:
lea edx, inBuffer
add edx, esi ; edx points the offset of first char
mov edi, esi ; save location of first char
LOOP2: ; check spaces between integers
cmp inBuffer[esi], 20h
je getNum
inc esi
loop LOOP2
jmp getNum ; jump to getNum if array ends with a number
getNum: ; converting char into int
push ecx
inc esi
cmp inBuffer[esi], 20h ; two spaces in a row is considered as no more numbers afterwards
je EndBuffer
dec esi
mov ecx, esi
sub ecx, edi ; length of single number in char
call ParseInteger32
mov edi, ArrayNum
mov intArray[edi], eax
inc ArrayNum
inc esi
pop ecx
loop LOOP1
jmp EndBuffer ; end procedure when loop is over
EndBuffer:
mov eax, ArrayNum
inc eax
ret
ArrayGet ENDP
END main
In case you have questions about my intentions in the code or about the form of the input, feel free to leave it at the comment section
This is probably my final hurdle in learning x86 assembly language.
The following subroutine is giving me a segmentation fault:
;=================================================================
; RemCharCodeFromAToB - removes all chars between a and e from str
; arguments:
; str - string to be processed
; a - start
; e - end
; return value:
; n/a
;-------------------------------------------------------------------
RemCharCodeFromAToB:
; standard entry sequence
push ebp ; save the previous value of ebp for the benefi$
mov ebp, esp ; copy esp -> ebp so that ebp can be used as a $
; accessing arguments
; [ebp + 0] = old ebp stack frame
; [ebp + 4] = return address
mov edx, [ebp + 8] ; string address
while_loop_rcc:
mov cl, [edx] ; obtain the address of the 1st character of the string
cmp cl, 0 ; check the null value
je while_loop_exit_rcc ; exit if the null-character is reached
mov al, cl ; save cl
mov cl, [ebp + 16] ; end-char
push cx ; push end-char
mov cl, [ebp + 12] ; start-char
push cx ; push start-char
push ax; ; push ch
call IsBetweenAandB
add esp, 12
cmp eax, 0 ; if(ch is not between 'a' and 'e')
je inner_loop_exit_rcc
mov eax, edx ; copy the current address
inner_loop_rcc:
mov cl, [eax+1]
cmp cl, 0
je inner_loop_exit_rcc
mov [eax], cl
inc eax
jmp inner_loop_rcc
inner_loop_exit_rcc:
inc edx ; increment the address
jmp while_loop_rcc ; start the loop again
while_loop_exit_rcc:
; standard exit sequence
mov esp, ebp ; restore esp with ebp
pop ebp ; remove ebp from stack
ret ; return the value of temporary variable
;===================================================================
I am suspecting that there is something wrong with data conversions from 32-bit to 8-bit registers and vice-versa. My concept regarding this is not clear yet.
Or, is there something wrong in the following part
mov al, cl ; save cl
mov cl, [ebp + 16] ; end-char
push cx ; push end-char
mov cl, [ebp + 12] ; start-char
push cx ; push start-char
push ax; ; push ch
call IsBetweenAandB
add esp, 12
?
Full asm code is here.
C++ code is here.
Makefile is here.
cx and ax are 16-bit registers, so your push cx ; push cx; push ax are pushing 16-bit values on the stack, a total of 6 bytes. But IsBetweenAandB is apparently expecting 32-bit values, and you add 12 to esp at the end (instead of 6). So you probably wanted push ecx etc.
Also, you probably want to zero out eax and ecx before using them. As it stands, they probably contain garbage initially, and you only load useful data into the low 8 bits al and cl. Thus when IsBetweenAandB tries to compare the full 32-bit values, you are going to get false results. Or else you want to rewrite IsBetweenAandB to only compare the low bytes that you care about.
The following is a an x86 assembly language subroutine, written by me, meant to be called from a C program:
;================================================================
; IsBetweenAandB - tests a character if it is between 'a' and 'b'
; arguments:
; ch = character to be tested
; a = start character
; b = end character
; return value:
; 1 = yes
; 0 = no
;----------------------------------------------------------------
IsBetweenAandB:
push ebp
mov ebp, esp
add esp, 12 ;<------------------------------------------(1)
; [ebp] = old ebp stack frame
; [ebp + 4] = return address
mov eax, [ebp + 8] ; [ebp+8] = ch
push ebx ;<------------------------------------------(2)
mov ebx, [ebp + 12] ; 'a' = start
push edx ;<------------------------------------------(2)
mov edx, [ebp + 16] ; 'e' = end
cmp eax, ebx ; compare 'ch' with 'a'
jae next_test_ibab ; if(ch>='a')
jb set_zero_ibab ; if(ch<'a')
next_test_ibab:
cmp eax, edx ; compare 'ch' with 'e'
jbe set_one_ibab ; if(ch<='e')
ja set_zero_ibab ; if(ch>'e')
set_one_ibab:
mov eax, 1
jmp returns_ibab
set_zero_ibab:
mov eax, 0
returns_ibab:
pop edx ;<------------------------------------------(2)
pop ebx ;<------------------------------------------(2)
mov esp, ebp
pop ebp
ret
I have two questions here:
Since, I am not using any local variable here, do I need to write add esp, 12?
(a) Are these pushes and pops actually necessary at all (these are intended for restoring the old values of ebx and edx registers)?
(b) Would they create any disturbance in esp?
I am trying to write an assembly program that calls a function in c that will replace certain characters in a string with a predefined character given that the currently character in the char array meets some qualification.
My c file:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
//display *((char *) $edi)
// These functions will be implemented in assembly:
//
int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
int isvowel (int c) {
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
return 1 ;
if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
return 1 ;
return 0 ;
}
int main(){
char *str1;
int r;
// I ran my code through a debugger again, and it seems that when displaying
// the character stored in ecx is listed as "A" (correct) right before the call
// to "add ecx, 1" at which point ecx somehow resets to 0 when it should be "B"
str1 = strdup("ABC 123 779 Hello World") ;
r = strrepl(str1, '#', &isdigit) ;
printf("str1 = \"%s\"\n", str1) ;
printf("%d chararcters were replaced\n", r) ;
free(str1) ;
return 0;
}
And my .asm file:
; File: strrepl.asm
; Implements a C function with the prototype:
;
; int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
;
;
; Result: chars in string are replaced with the replacement character and string is returned.
SECTION .text
global strrepl
_strrepl: nop
strrepl:
push ebp ; set up stack frame
mov ebp, esp
push esi ; save registers
push ebx
xor eax, eax
mov ecx, [ebp + 8] ;load string (char array) into ecx
jecxz end ;jump if [ecx] is zero
mov esi, [ebp + 12] ;move the replacement character into esi
mov edx, [ebp + 16] ;move function pointer into edx
xor bl, bl ;bl will be our counter
firstLoop:
add bl, 1 ;inc bl would work too
add ecx, 1
mov eax, [ecx]
cmp eax, 0
jz end
push eax ; parameter for (*isinsubset)
;BREAK
call edx ; execute (*isinsubset)
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
end:
pop ebx ; restore registers
pop esi
mov esp, ebp ; take down stack frame
pop ebp
ret
When running this through gdb and putting a breakpoint at ;BREAK, it segfaults after I take a step to the call command with the following error:
Program received signal SIGSEGV, Segmentation fault.
0x0081320f in isdigit () from /lib/libc.so.6
isdigit is part of the standard c library that i have included in my c file, so I am not sure what to make of this.
Edit: I have edited my firstLoop and included a secondLoop which should replace any digits with "#", however it seems to replace the entire array.
firstLoop:
xor eax, eax
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
mov ebp, edx ; save function pointer
push eax ; parameter for (*isinsubset)
call edx ; execute (*isinsubset)
;cmp eax, 0
;jne end
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
mov edx, ebp ; restore function pointer
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
mov [ecx], esi
mov edx, ebp
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
Using gdb, when the code gets to secondloop, everything is correct. ecx is showing as "1" which is the first digit in the string that was passed in from the .c file. Esi is displaying as "#" as it should be. However, after I do mov [ecx], esi it seems to fall apart. ecx is displaying as "#" as it should at this point, but once I increment by 1 to get to the next character in the array, it is listed as "/000" with display. Every character after the 1 is replaced with "#" is listed as "/000" with display. Before I had the secondLoop trying to replace the characters with "#", I just had firstLoop looping with it self to see if it could make it through the entire array without crashing. It did, and after each increment ecx was displaying as the correct character. I am not sure why doing mov [ecx], esi would have set the rest of ecx to null.
In your firstLoop: you're loading characters from the string using:
mov eax, [ecx]
which is loading 4 bytes at a tie instead of a single byte. So the int that you're passing to isdigit() is likely to by far out of range for it to handle (it probably uses a simple table lookup).
You can load a single byte using the following Intel asm syntax:
movzx eax, byte ptr [ecx]
A few other things:
it will also have the effect that it probably wouldn't detect the end of the string properly since the null terminator might not be followed by three other zero bytes.
I'm not sure why you increment ecx before processing the first character in the string
the assembly code you posted doesn't appear to actually loop over the string
I've put some comments into your code:-
; this is OK: setting up the stack frame and saving important register
; on Win32, the registers that need saving are: esi, edi and ebx
; the rest can be used without needing to preserve them
push ebp
mov ebp, esp
push esi
push ebx
xor eax, eax
mov ecx, [ebp + 8]
; you said that this checked [ecx] for zero, but I think you've just written
; that wrong, this checks the value of ecx for zero, the [reg] form usually indicates
; the value at the address defined by reg
; so this is effectively doing a null pointer check (which is good)
jecxz end
mov esi, [ebp + 12]
mov edx, [ebp + 16]
xor bl, bl
firstLoop:
add bl, 1
; you increment ecx before loading the first character, this means
; that the function ignores the first character of the string
; and will therefore produce an incorrect result if the string
; starts with a character that needs replacing
add ecx, 1
; characters are 8 bit, not 32 bit (mentioned in comments elsewhere)
mov eax, [ecx]
cmp eax, 0
jz end
push eax
; possibly segfaults due to character out of range
; also, as mentioned elsewhere, the function you call here must conform to the
; the standard calling convention of the system (e.g, preserve esi, edi and ebx for
; Win32 systems), so eax, ecx and edx can change, so next time you call
; [edx] it might be referencing random memory
; either save edx on the stack (push before pushing parameters, pop after add esp)
; or just load edx with [ebp+16] here instead of at the start
call edx
add esp, 4
mov ebx, eax
; more functionality required here!
end:
; restore important values, etc
pop ebx
pop esi
mov esp, ebp
pop ebp
; the result of the function should be in eax, but that's not set up properly yet
ret
Comments on your inner loop:-
firstLoop:
xor eax, eax
; you're loading a 32 bit value and checking for zero,
; strings are terminated with a null character, an 8 bit value,
; not a 32 bit value, so you're reading past the end of the string
; so this is unlikely to correctly test the end of string
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
; you need to keep ebp! its value must be saved (at the end,
; you do a mov esp,ebp)
mov ebp, edx ; save function pointer
push eax ; parameter for (*isinsubset)
call edx ; execute (*isinsubset)
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
mov edx, ebp ; restore function pointer
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
; again, your accessing the string using a 32 bit value, not an 8 bit value
; so you're replacing the matched character and the three next characters
; with the new value
; the upper 24 bits are probably zero so the loop will terminate on the
; next character
; also, the function seems to be returning a count of characters replaced,
; but you're not recording the fact that characters have been replaced
mov [ecx], esi
mov edx, ebp
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
You do seem to be having trouble with the way the memory works, you are getting confused between 8 bit and 32 bit memory access.