Interaction with array results in a segfault - arrays

I'm trying to implement a simple addition calculator, but I'm not able to store the input in my array. I'm trying to read char by char because I want to use it later to implement a backend for my B compiler (which has the getchar function that reads char by char from stdin). My code is the fallowing:
segment .data
numb db 0, 0, 0, 0
indx db 0
char db '0'
newl db 0ah
msg1 db 'enter a number: '
len1 equ $ - msg1
segment .text
global _start ; defines the entry point
print: ; push msg; push len
pop eax ; removes caller address from stack
pop edx ; gets length
pop ecx ; gets msg
push eax ; pushes CA to stack again
mov ebx , 01h ; tells that it's an output call
mov eax , 04h ; system call (write)
int 80h ; calls it
ret
getc: ; push add; push len
pop eax ; removes caller address from stack
pop ecx ; gets ouput addrress
push eax ; pushes CA to stack again
mov edx , 01h
mov ebx , 00h ; tells that it's an input call
mov eax , 03h ; system call (read)
int 80h ; calls it
ret
exit:
mov ebx , 0 ; sets exit code
mov eax , 01h ; system call (exit)
int 80h ; calls it
_start:
push msg1
push len1
call print
read:
push char
call getc
mov eax , numb
add eax , indx
mov [eax], dword char
inc byte [indx]
mov eax , char
cmp eax , newl
jne read
jmp exit ; exits program
for now I'm just trying to store the input, because I got segfaults from the complete code, so I started stripping off code until I found the error cause.

You probably don't want to insert the newline in the array, so start with checking for the newline:
read:
push char
call getc
mov al, [char]
cmp al, 10
je done
Then load the byte-sized index in an address register, remembering that AL already contains the datum, so pick another register than EAX. Also, instead of adding the array address numb and the index indx yourself, let the CPU do that for your with an addressing mode that has a displacement component ([numb + ebx]):
movzx ebx, byte [indx]
mov [numb + ebx], al
inc byte [indx]
jmp read
done:
jmp exit
There's also the possibility to define the index indx as a dword with indx dd 0. Then the code becomes:
read:
push char
call getc
mov al, [char]
cmp al, 10
je done
mov ebx, [indx]
mov [numb + ebx], al
inc dword [indx]
jmp read
done:
jmp exit
The lesson here is that NASM is different from MASM in how you address memory:
MASM
mov eax, offset MyVar ; Load address of MyVar
mov eax, MyVar ; Load value stored in MyVar
NASM
mov eax, MyVar ; Load address of MyVar
mov eax, [MyVar] ; Load value stored in MyVar

Related

Can't find the reason for a build error on Assembly language program which converts a string Array into an integer array

I am creating a program which reads a list of integers seperated by a single space via console and printing the sum of all the integers. The main problem is extracting the integers from the string array into a signed integer array.
Some examples of input are "-20 30 5" (each integer is seperated by a single space) or " [space]-20 30 5 [space]" (there may be spaces between the beginning and the end of the list, but the numbers are still seperated by a single space)
Also, after printing the sum, the program returns to reading another input unless only the enter key is typed.
After writing the code and pressing the Debug button, I am getting these two following build errors:
A2005 symbol redefinition: InBuffer
A2111 conflicting parameter definition
I've checked the error messages and apparently both of them are related to the PROTO and PROC directives. But there seems to be no problems regarding the parameter definition.
Here is my code.
INCLUDE Irvine32.inc
ArrayGet PROTO, ; convert string array into int array
inBuffer: PTR BYTE,
inBufferN: DWORD,
intArray: PTR SDWORD
.data
BUF_SIZE EQU 256
inBuffer BYTE BUF_SIZE DUP(?) ; input buffer
inBufferN DWORD ? ; length of input
intArray SDWORD BUF_SIZE/2 DUP(?) ; integer array for storing converted string
intArrayN DWORD ? ; number of integers
prompt BYTE "Enter numbers(<ent> to exit) : ", 0
bye BYTE "Bye!", 0
.code
main PROC
L1:
mov esi, 0
mov edx, OFFSET prompt
call WriteString
mov edx, OFFSET inBuffer
mov ecx, BUF_SIZE
call ReadString
cmp inBuffer[0], 0ah
je L3 ; only typing <ent> ends the program
mov inBufferN, eax
mov ecx, inBufferN
SpaceCheck: ; calls procedure when it finds a number
cmp inBuffer[esi], 20h
jne L2
inc esi
loop SpaceCheck
jmp L1
L2:
INVOKE ArrayGet, ADDR inBuffer, inBufferN, ADDR intArray ; put inBuffer offset on edx, inBufferN on ecx
mov intArrayN, eax
mov ecx, intArrayN
mov eax, 0
mov esi, OFFSET intArray
Ladd: ; adding the integer array
add eax, [esi]
inc esi
loop Ladd
call WriteInt
call CRLF
jmp L1
L3:
mov edx, OFFSET bye
call WriteString
exit
main ENDP
; procedure definition
ArrayGet PROC USES edx ecx,
inBuffer : PTR BYTE,
inBufferN: DWORD,
intArray: PTR SDWORD
LOCAL ArrayNum: DWORD
mov ArrayNum, 0
mov ecx, inBufferN
sub ecx, esi ; ecx(loop count) from first char to the end
LOOP1:
lea edx, inBuffer
add edx, esi ; edx points the offset of first char
mov edi, esi ; save location of first char
LOOP2: ; check spaces between integers
cmp inBuffer[esi], 20h
je getNum
inc esi
loop LOOP2
jmp getNum ; jump to getNum if array ends with a number
getNum: ; converting char into int
push ecx
inc esi
cmp inBuffer[esi], 20h ; two spaces in a row is considered as no more numbers afterwards
je EndBuffer
dec esi
mov ecx, esi
sub ecx, edi ; length of single number in char
call ParseInteger32
mov edi, ArrayNum
mov intArray[edi], eax
inc ArrayNum
inc esi
pop ecx
loop LOOP1
jmp EndBuffer ; end procedure when loop is over
EndBuffer:
mov eax, ArrayNum
inc eax
ret
ArrayGet ENDP
END main
In case you have questions about my intentions in the code or about the form of the input, feel free to leave it at the comment section

Printing array as double word in assembly

%include "asm_io.inc"
;
; initialized data is put in the .data segment
;
segment .data
array: dd 180,32,455,499,388,480,239,346,257,84
fmt: dd ",%d",0
; uninitialized data is put in the .bss segment
;
segment .bss
resd 10
;
; code is put in the .text segment
;
segment .text
extern printf
global asm_main
asm_main:
enter 0,0 ; setup routine
pusha
; The following is just example of how to print an array
push dword 10
push dword array
call print_array
add esp,8 ; clean up stack
; don't delete anything following this comment
popa
mov eax, 0 ; return back to C
leave
ret
segment .data
ListFormat db ",%u", 0
segment .text
global print_array
print_array:
enter 0,0
push esi
push ebx
xor esi, esi ; esi = 0
mov ecx, [ebp+12] ; ecx = n
mov ebx, [ebp+8]
xor edx, edx
mov dl, [ebx + esi] ; ebx = address of array
mov eax,edx
call print_int
dec ecx
inc esi
print_loop:
xor edx,edx
mov dl,[ebx + esi]
push ecx ; printf might change ecx!
push edx ; push array value
push dword ListFormat
call printf
add esp, 8 ; remove parameters (leave ecx!)
inc esi
pop ecx
loop print_loop
call print_nl
pop ebx
pop esi
leave
ret
So this code prints out 180,0,0,0,32,0,0,0,199,1 when I want to print out 180,32,455,499,388,480,239,346,257,84. I think that it's because this is designed to print byte words. I'm trying to print in double words and I'm guessing something in the print_array needs to be changed. I tried mov dl, [ebx+esi*4] but it still doesn't print the array that I want to print. Or does something else needs to be changed to print array of double words?
You could leave it at changing the mov dl, [ebx+esi] instruction into mov edx, [ebx+esi*4], but that would be just half the fun!
1 Why not try to make a loop that can deal with the special case of the first value in the list that doesn't need the comma prefix? No more using print_int.
2 Also don't use the LOOP instruction. It's slow! The pair cmp jb (that can macro-fuse) is much better.
3 And replacing the prolog enter 0,0 and epilog leave codes by simply addressing the parameters via ESP relative addressing is simple enough.
4 Always consider the special cases! What if the array happens to be empty?
print_array:
push ebx
push esi
mov ebx, [esp+12] ; Begin array
mov esi, [esp+16] ; n
test esi, esi
jz done
lea esi, [ebx+esi*4] ; End array
mov edx, ListFormat+1 ; "%u"
more: mov eax, [ebx] ; Array dword value
push eax
push edx ; "%u" first time, ",%u" others
call printf
add esp, 8
add ebx, 4 ; To next dword in the array
mov edx, ListFormat ; ",%u"
cmp ebx, esi ; Current address < Last address ?
jb more ; Yes
call print_nl
done: pop esi
pop ebx
ret
Under the right conditions, keeping ESP fixed inside this loop can be worth doing. See Peter Cordes' comments below this answer.
Next is a version of this code that keeps ESP fixed inside the loop:
print_array:
push ebx
push esi
mov ebx, [esp+12] ; Begin array
mov esi, [esp+16] ; n
test esi, esi
jz done
sub esp, 8 ; Space for the printf args
lea esi, [ebx+esi*4] ; End array
mov edx, ListFormat+1 ; "%u"
more: mov eax, [ebx] ; Array dword value
mov [esp+4], eax
mov [esp], edx ; "%u" first time, ",%u" others
call printf
add ebx, 4 ; To next dword in the array
mov edx, ListFormat ; ",%u"
cmp ebx, esi ; Current address < Last address ?
jb more ; Yes
call print_nl
add esp, 8
done: pop esi
pop ebx
ret

Segmentation fault Core Dumped X86 Assembly Array

I asked for help earlier and thought I was home free but I'm not. My logic was wrong and I've greatly altered it. This program is supposed to return the Max int in the array (which also happens to be the last element). After tracing it with GDB a few times, I see that I get to the 5th (2nd to last) element in the array, "20", when I hit a segmentation fault and the program halts. I set ECX to the array length and subtracted 1 from ECX to avoid this, but my logic is obviously wrong. Am I wrong in depending on the ECX counter to terminate the loop. Any ideas please?
***EDIT MAJOR EDITS TO CODE
SECTION .data ;data section
msg1: db "Here are the array elements:", 10, 0
msg1Len: equ $-msg1
msg2: db "Here is the sorted array:", 10, 0
msg2Len: equ $-msg2
arr: dd 12, 16, 6, 18, 10, 40, 30
arrLen: equ ($-arr)/4 ;number of elements = array length / 4
SECTION .bss
max resd 1 ;declare and reserve space for max
SECTION .text
global bsort
bsort:
push ebp ; save old ebp register
mov ebp, esp ; build a new stack
restart:
mov ebx, arr ; the base address argument is saved in ebx register
mov ecx, arrLen ; the size argument is saved in exc register
sub ecx, 1 ; Last member has no following element to compare with.
; So we need to reduce the counter by 1
top:
mov eax, [ebx] ;; access first array element. Move its value to eax
cmp eax, [ebx+4] ; compare the value of eax ([ebx]) with [ebx+4]
jle noswap ; if value at eax is less or equal to value of [ebx+4]
; no need to exchang values. Jump to noswap
xchg eax, [ebx+4] ; if value at eax > value [ebx+4], exchange
mov [ebx], eax ; store the new exchanged value at [ebx]
jmp restart ; reset the base address and counter. Start over
noswap:
add ebx, 4 ; move to the next array element
loop top ; loop back to the top if the register ecx > 0
leave
ret
global main
main:
push ebp
mov ebp, esp
mov ecx, msg1 ;print msg1
mov edx, msg1Len
call PString
;save array base address in ebx and save sizein in ecx
mov ebx, arr
mov ecx, arrLen; store num elements in ecx
;loop to print array
PrintArray:
mov eax, [ebx] ;move value [ebx] to eax
call PrintDec
call Println
add ebx, 4
loop PrintArray
;call bubblesort
call bsort
mov ecx, msg2
mov edx, msg2Len
call PString
;save arr base add in sbx and size in ecx
mov ebx, arr
mov ecx, arrLen
PrintSortedArray:
mov eax, [ebx]
call PrintDec
call Println
add ebx, 4
loop PrintSortedArray
;exit program and clean stack
mov esp, ebp
pop ebp
ret
PString:; save register values of the called function
pusha
mov eax,4 ; use 'write' system call = 4
mov ebx,1 ; file descriptor 1 = STDOUT
int 80h ; call the kernel
; restore the old register values of the called function
popa
ret
Println:
;will call PString func
;will change content of ecx and edx
;need to save registers used by the main program
section .data
nl db 10
section .text
pusha
mov ecx, nl
mov edx, 1
call PString
;return original register values
popa
ret
PrintDec:
;saves all registers so they return unmodified
;build the function to handle dword size
section .bss
decstr resb 10 ; 10 32-bit digits
ct1 resd 1 ;keep track of dec-string size
section .text
pusha; save registers
mov dword[ct1],0 ;initially assume 0
mov edi, decstr ; edi points to dec-string
add edi, 9 ; moved to the last element of string
xor edx, edx ; clear edx for 64-bit div
whileNotZero:
mov ebx, 10 ; get ready to divide by 10
div ebx ; divide by 10
add edx, '0' ; convert to ascii
mov byte[edi], dl ; put it in string
dec edi ; move to next char in str
inc dword[ct1] ; inc char counter
xor edx, edx ; clear edx
cmp eax, 0 ;is remainder 0?
jne whileNotZero ;if no, keep on looping
inc edi ; conversion finished, bring edi
mov ecx, edi ; back to start of string. make ecx
mov edx, [ct1] ; point to counterm edx gets # chars
mov eax, 4 ; print to stdout
mov ebx, 1
int 0x80 ; call kernel
popa ; restore registers
ret

X86 Assembly Printing Max in Array returns Segmentation fault (core dumped)

My program works, but there is something wrong with my printMax function. The program terminates with a
Segmentation fault (core dumped).
I have tried building a stack for the function and just doing a pusha popa and both ways, I get the seg fault core dumped.
I've tried calling the function, but it just runs twice.
Any idea what I am doing wrong?
SECTION .data ;data section
msg1 : db "Here are the array elements:", 10, 0
msg1Len: equ $-msg1
msg2 : db "Here is the max value in the array:", 10, 0
msg2Len: equ $-msg2
arr : dd 2,4,6,8,10,20,40
arrLen : equ ($-arr)/4 ;number of elements = array length / 4
SECTION .bss
max resd 1 ;declare and reserve space for max
SECTION .text
global main
main:
push ebp
mov ebp, esp
mov ecx, msg1 ;print msg1
mov edx, msg1Len
call PString
;save array base address in ebx and save sizein in ecx
mov ebx, arr
mov ecx, arrLen; store num elements in ecx
;loop to print array
PrintArray:
mov eax, [ebx] ;move value [ebx] to eax
call PrintDec
call Println
add ebx, 4
loop PrintArray
printMax:
section .text
pusha
;reset array to find max
mov ebx, arr
mov ecx, arrLen
loopForMax:
mov eax, [ebx]
cmp eax, [ebx +4]
jle sameMax
mov [max], eax
sameMax:
add ebx, 4 ;move to next element
loop loopForMax
mov ecx, msg2
mov edx, msg2Len
call PString
mov eax, [max]
call PrintDec
call Println
popa
ret
;exit program and clean stack
mov esp, ebp
pop ebp
ret
PString:; save register values of the called function
pusha
mov eax,4 ; use 'write' system call = 4
mov ebx,1 ; file descriptor 1 = STDOUT
int 80h ; call the kernel
; restore the old register values of the called function
popa
ret
Println:
;will call PString func
;will change content of ecx and edx
;need to save registers used by the main program
section .data
nl db 10
section .text
pusha
mov ecx, nl
mov edx, 1
call PString
;return original register values
popa
ret
PrintDec:
;saves all registers so they return unmodified
;build the function to handle dword size
section .bss
decstr resb 10 ; 10 32-bit digits
ct1 resd 1 ;keep track of dec-string size
section .text
pusha; save registers
mov dword[ct1],0 ;initially assume 0
mov edi, decstr ; edi points to dec-string
add edi, 9 ; moved to the last element of string
xor edx, edx ; clear edx for 64-bit div
whileNotZero:
mov ebx, 10 ; get ready to divide by 10
div ebx ; divide by 10
add edx, '0' ; convert to ascii
mov byte[edi], dl ; put it in string
dec edi ; move to next char in str
inc dword[ct1] ; inc char counter
xor edx, edx ; clear edx
cmp eax, 0 ;is remainder 0?
jne whileNotZero ;if no, keep on looping
inc edi ; conversion finished, bring edi
mov ecx, edi ; back to start of string. make ecx
mov edx, [ct1] ; point to counterm edx gets # chars
mov eax, 4 ; print to stdout
mov ebx, 1
int 0x80 ; call kernel
popa ; restore registers
ret

Calling C function in Assembly Segfaults

I am trying to write an assembly program that calls a function in c that will replace certain characters in a string with a predefined character given that the currently character in the char array meets some qualification.
My c file:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
//display *((char *) $edi)
// These functions will be implemented in assembly:
//
int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
int isvowel (int c) {
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
return 1 ;
if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
return 1 ;
return 0 ;
}
int main(){
char *str1;
int r;
// I ran my code through a debugger again, and it seems that when displaying
// the character stored in ecx is listed as "A" (correct) right before the call
// to "add ecx, 1" at which point ecx somehow resets to 0 when it should be "B"
str1 = strdup("ABC 123 779 Hello World") ;
r = strrepl(str1, '#', &isdigit) ;
printf("str1 = \"%s\"\n", str1) ;
printf("%d chararcters were replaced\n", r) ;
free(str1) ;
return 0;
}
And my .asm file:
; File: strrepl.asm
; Implements a C function with the prototype:
;
; int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
;
;
; Result: chars in string are replaced with the replacement character and string is returned.
SECTION .text
global strrepl
_strrepl: nop
strrepl:
push ebp ; set up stack frame
mov ebp, esp
push esi ; save registers
push ebx
xor eax, eax
mov ecx, [ebp + 8] ;load string (char array) into ecx
jecxz end ;jump if [ecx] is zero
mov esi, [ebp + 12] ;move the replacement character into esi
mov edx, [ebp + 16] ;move function pointer into edx
xor bl, bl ;bl will be our counter
firstLoop:
add bl, 1 ;inc bl would work too
add ecx, 1
mov eax, [ecx]
cmp eax, 0
jz end
push eax ; parameter for (*isinsubset)
;BREAK
call edx ; execute (*isinsubset)
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
end:
pop ebx ; restore registers
pop esi
mov esp, ebp ; take down stack frame
pop ebp
ret
When running this through gdb and putting a breakpoint at ;BREAK, it segfaults after I take a step to the call command with the following error:
Program received signal SIGSEGV, Segmentation fault.
0x0081320f in isdigit () from /lib/libc.so.6
isdigit is part of the standard c library that i have included in my c file, so I am not sure what to make of this.
Edit: I have edited my firstLoop and included a secondLoop which should replace any digits with "#", however it seems to replace the entire array.
firstLoop:
xor eax, eax
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
mov ebp, edx ; save function pointer
push eax ; parameter for (*isinsubset)
call edx ; execute (*isinsubset)
;cmp eax, 0
;jne end
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
mov edx, ebp ; restore function pointer
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
mov [ecx], esi
mov edx, ebp
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
Using gdb, when the code gets to secondloop, everything is correct. ecx is showing as "1" which is the first digit in the string that was passed in from the .c file. Esi is displaying as "#" as it should be. However, after I do mov [ecx], esi it seems to fall apart. ecx is displaying as "#" as it should at this point, but once I increment by 1 to get to the next character in the array, it is listed as "/000" with display. Every character after the 1 is replaced with "#" is listed as "/000" with display. Before I had the secondLoop trying to replace the characters with "#", I just had firstLoop looping with it self to see if it could make it through the entire array without crashing. It did, and after each increment ecx was displaying as the correct character. I am not sure why doing mov [ecx], esi would have set the rest of ecx to null.
In your firstLoop: you're loading characters from the string using:
mov eax, [ecx]
which is loading 4 bytes at a tie instead of a single byte. So the int that you're passing to isdigit() is likely to by far out of range for it to handle (it probably uses a simple table lookup).
You can load a single byte using the following Intel asm syntax:
movzx eax, byte ptr [ecx]
A few other things:
it will also have the effect that it probably wouldn't detect the end of the string properly since the null terminator might not be followed by three other zero bytes.
I'm not sure why you increment ecx before processing the first character in the string
the assembly code you posted doesn't appear to actually loop over the string
I've put some comments into your code:-
; this is OK: setting up the stack frame and saving important register
; on Win32, the registers that need saving are: esi, edi and ebx
; the rest can be used without needing to preserve them
push ebp
mov ebp, esp
push esi
push ebx
xor eax, eax
mov ecx, [ebp + 8]
; you said that this checked [ecx] for zero, but I think you've just written
; that wrong, this checks the value of ecx for zero, the [reg] form usually indicates
; the value at the address defined by reg
; so this is effectively doing a null pointer check (which is good)
jecxz end
mov esi, [ebp + 12]
mov edx, [ebp + 16]
xor bl, bl
firstLoop:
add bl, 1
; you increment ecx before loading the first character, this means
; that the function ignores the first character of the string
; and will therefore produce an incorrect result if the string
; starts with a character that needs replacing
add ecx, 1
; characters are 8 bit, not 32 bit (mentioned in comments elsewhere)
mov eax, [ecx]
cmp eax, 0
jz end
push eax
; possibly segfaults due to character out of range
; also, as mentioned elsewhere, the function you call here must conform to the
; the standard calling convention of the system (e.g, preserve esi, edi and ebx for
; Win32 systems), so eax, ecx and edx can change, so next time you call
; [edx] it might be referencing random memory
; either save edx on the stack (push before pushing parameters, pop after add esp)
; or just load edx with [ebp+16] here instead of at the start
call edx
add esp, 4
mov ebx, eax
; more functionality required here!
end:
; restore important values, etc
pop ebx
pop esi
mov esp, ebp
pop ebp
; the result of the function should be in eax, but that's not set up properly yet
ret
Comments on your inner loop:-
firstLoop:
xor eax, eax
; you're loading a 32 bit value and checking for zero,
; strings are terminated with a null character, an 8 bit value,
; not a 32 bit value, so you're reading past the end of the string
; so this is unlikely to correctly test the end of string
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
; you need to keep ebp! its value must be saved (at the end,
; you do a mov esp,ebp)
mov ebp, edx ; save function pointer
push eax ; parameter for (*isinsubset)
call edx ; execute (*isinsubset)
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
mov edx, ebp ; restore function pointer
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
; again, your accessing the string using a 32 bit value, not an 8 bit value
; so you're replacing the matched character and the three next characters
; with the new value
; the upper 24 bits are probably zero so the loop will terminate on the
; next character
; also, the function seems to be returning a count of characters replaced,
; but you're not recording the fact that characters have been replaced
mov [ecx], esi
mov edx, ebp
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
You do seem to be having trouble with the way the memory works, you are getting confused between 8 bit and 32 bit memory access.

Resources