getting character from string and using it as array index... ASM

getting character from string and using it as array index... ASM - arrays

Having trouble using a string array and getting each character from it and adding a 1 to a frequency table of the corresponding ascii index (frequency table is indexed by ascii value): Example, get character 'a' then add 1 to the frequency table of index of the array ['a']. I was getting segmentation errors and now getting error: invalid combination of opcode and operands, talking about mov ax, al
Any questions about the parameters of the problem please ask. I have working on this for hours and could really use another pair of eyes to check what I am doing wrong (syntax/concept if you see one) Please help.
Update: I have got it print stuff out, so I think it is "working"; however I am now trying to print the characters that each array index corresponds. It won't print the character of the array that I am pointing to (it prints literally nothing for the character).
Latest update: I got it to work. changed some of the code under the label .loopa and now it works fine! :)
Code below:
SECTION .data ; Data section, initialized variables
array5: db "Hello, world...", 0
array5Len: equ $-array5-1
asoutput: db "%s", 0 ; string output
newline: db "", 10, 0 ; format for a new line
acoutput: db "%c: ", 0 ; output format for character output
SECTION .bss ; BSS, uninitialized variables
arrayq: resd 128 ; frequency array of the first 127 ascii values initialized to 0 (none have been counted yet)
SECTION .text
global main ; the standard gcc entry point
main: ; the program label for the entry point
push ebp ; set up stack frame
mov ebp,esp
mov esi, array5
mov edi, 0
mov ebx, arrayq
mov ecx, array5Len
; get each character of array5 and add 1 to the frequency table of the corresponding ascii value (which the arrayq is indexed by ascii value).
.loopf:
xor eax, eax
mov al, [esi]
;mov ax, [esi]
;mov ax, al
;mov cx, ax
add edi, eax
mov ebx, 1
add [arrayq+4*edi], ebx
mov edi, 0
add esi, 1
loop .loopf
push dword array2
push dword asoutput
call printf
add esp, 8
push dword newline
call printf
add esp, 4
;pop ebx
mov ebx, arrayq
mov ecx, 128 ; size of arrayq
mov esi, 0 ;start at beginning
.loopa:
mov eax, 0
cmp [ebx+esi], eax
je .skip
mov eax, esi
push ebx
push ecx
mov ebx, 4
cdq
div ebx
push eax
push dword acoutput
call printf
add esp, 8
pop ecx
pop ebx
push ebx
push ecx ; make sure to put ecx (counter) on stack so we don't lose it when calling printf)
push dword [ebx + esi] ; put the value of the array at this (esi) index on the stack to be used by printf
push dword aoutput ; put the array output format on the stack for printf to use
call printf ; call the printf command
add esp, 8 ; add 4 bytes * 2
pop ecx ; get ecx back
pop ebx
push ebx
push ecx
push dword newline
call printf
add esp, 4
pop ecx
pop ebx
.skip:
add esi, 4
loop .loopa
.end:
mov esp, ebp ; takedown stack frame
pop ebp ; same as "leave" op

Changed code under .loopa label to make it print the character the index is corresponding to:
.loopa:
mov eax, 0
cmp [ebx+esi], eax
je .skip
mov eax, esi
push ebx
push ecx
mov ebx, 4
cdq
div ebx
push eax
push dword acoutput
call printf
add esp, 8
pop ecx
pop ebx

Related

Printing array as double word in assembly

%include "asm_io.inc"
;
; initialized data is put in the .data segment
;
segment .data
array: dd 180,32,455,499,388,480,239,346,257,84
fmt: dd ",%d",0
; uninitialized data is put in the .bss segment
;
segment .bss
resd 10
;
; code is put in the .text segment
;
segment .text
extern printf
global asm_main
asm_main:
enter 0,0 ; setup routine
pusha
; The following is just example of how to print an array
push dword 10
push dword array
call print_array
add esp,8 ; clean up stack
; don't delete anything following this comment
popa
mov eax, 0 ; return back to C
leave
ret
segment .data
ListFormat db ",%u", 0
segment .text
global print_array
print_array:
enter 0,0
push esi
push ebx
xor esi, esi ; esi = 0
mov ecx, [ebp+12] ; ecx = n
mov ebx, [ebp+8]
xor edx, edx
mov dl, [ebx + esi] ; ebx = address of array
mov eax,edx
call print_int
dec ecx
inc esi
print_loop:
xor edx,edx
mov dl,[ebx + esi]
push ecx ; printf might change ecx!
push edx ; push array value
push dword ListFormat
call printf
add esp, 8 ; remove parameters (leave ecx!)
inc esi
pop ecx
loop print_loop
call print_nl
pop ebx
pop esi
leave
ret
So this code prints out 180,0,0,0,32,0,0,0,199,1 when I want to print out 180,32,455,499,388,480,239,346,257,84. I think that it's because this is designed to print byte words. I'm trying to print in double words and I'm guessing something in the print_array needs to be changed. I tried mov dl, [ebx+esi*4] but it still doesn't print the array that I want to print. Or does something else needs to be changed to print array of double words?

You could leave it at changing the mov dl, [ebx+esi] instruction into mov edx, [ebx+esi*4], but that would be just half the fun!
1 Why not try to make a loop that can deal with the special case of the first value in the list that doesn't need the comma prefix? No more using print_int.
2 Also don't use the LOOP instruction. It's slow! The pair cmp jb (that can macro-fuse) is much better.
3 And replacing the prolog enter 0,0 and epilog leave codes by simply addressing the parameters via ESP relative addressing is simple enough.
4 Always consider the special cases! What if the array happens to be empty?
print_array:
push ebx
push esi
mov ebx, [esp+12] ; Begin array
mov esi, [esp+16] ; n
test esi, esi
jz done
lea esi, [ebx+esi*4] ; End array
mov edx, ListFormat+1 ; "%u"
more: mov eax, [ebx] ; Array dword value
push eax
push edx ; "%u" first time, ",%u" others
call printf
add esp, 8
add ebx, 4 ; To next dword in the array
mov edx, ListFormat ; ",%u"
cmp ebx, esi ; Current address < Last address ?
jb more ; Yes
call print_nl
done: pop esi
pop ebx
ret
Under the right conditions, keeping ESP fixed inside this loop can be worth doing. See Peter Cordes' comments below this answer.
Next is a version of this code that keeps ESP fixed inside the loop:
print_array:
push ebx
push esi
mov ebx, [esp+12] ; Begin array
mov esi, [esp+16] ; n
test esi, esi
jz done
sub esp, 8 ; Space for the printf args
lea esi, [ebx+esi*4] ; End array
mov edx, ListFormat+1 ; "%u"
more: mov eax, [ebx] ; Array dword value
mov [esp+4], eax
mov [esp], edx ; "%u" first time, ",%u" others
call printf
add ebx, 4 ; To next dword in the array
mov edx, ListFormat ; ",%u"
cmp ebx, esi ; Current address < Last address ?
jb more ; Yes
call print_nl
add esp, 8
done: pop esi
pop ebx
ret

NASM: Convert multi character input to decimal

I am trying to write a program that gets a number with one or two digits and write Hello! as many times as that number.
I used this posts to write my code:
NASM: The loop turns into an infinite loop
Check null character in assembly language
Multi-Digit Input from NASM I did not understand this :)
But my code only works for two digit numbers and the result for single digit numbers are wrong.
My code:
section .data
msg db 'Hello!',0xA
len equ $-msg
section .bss
n resb 2
section .text
global _start
_start:
;get n
mov edx, 2
mov ecx, n
mov ebx, 0
mov eax, 3
int 0x80
lea esi,[n] ;address of our text
call toint
;loop for print 'Hello!\n'
print_loop:
push ecx
mov edx, len
mov ecx, msg
mov ebx, 1
mov eax, 4
int 0x80
pop ecx
loop print_loop
mov eax, 1
int 0x80
toint:
push eax
push ebx
xor ebx, ebx
next_digit:
movzx eax, byte[esi]
sub al , '0'
imul ebx, 10
add ebx, eax
inc esi
cmp byte [esi] , 0x0 ;check next character is null or not
jne next_digit
; end_loop:
mov ecx, ebx
pop eax
pop ebx
ret

The sys_read call returns in EAX the count of characters that were sent to your inputbuffer. Because you allowed for an input of max. 2 characters, this count will be either 0, 1, or 2. You could use this info in your toint routine.
; IN (eax,esi) OUT (ecx)
toint:
mov ecx, eax ; Number of digits
jecxz done
push eax ; (1)
push ebx ; (2)
push esi ; (3)
xor ebx, ebx
next:
movzx eax, byte [esi]
sub al , '0'
imul ebx, 10
add ebx, eax
inc esi
dec ecx
jnz next
mov ecx, ebx
pop esi ; (3)
pop ebx ; (2)
pop eax ; (1)
done:
ret
Please notice that there's a reversed ordering to be observed when preserving/restoring registers on the stack! (Your code missed this...)
4 tips
a. Prefer the MOV variant to load an address. It's always a shorter instruction.
b. Guard yourself against an input of zero.
c. Don't use LOOP. It's a slow instruction.
d. Provide the exit code to terminate the program.
mov esi, n ; (a)
call toint ; -> ECX
jecxz Exit ; (b)
print_loop:
...
dec ecx ; (c)
jnz print_loop
Exit:
xor ebx, ebx ; (d)
mov eax, 1
int 0x80

Segmentation fault Core Dumped X86 Assembly Array

I asked for help earlier and thought I was home free but I'm not. My logic was wrong and I've greatly altered it. This program is supposed to return the Max int in the array (which also happens to be the last element). After tracing it with GDB a few times, I see that I get to the 5th (2nd to last) element in the array, "20", when I hit a segmentation fault and the program halts. I set ECX to the array length and subtracted 1 from ECX to avoid this, but my logic is obviously wrong. Am I wrong in depending on the ECX counter to terminate the loop. Any ideas please?
***EDIT MAJOR EDITS TO CODE
SECTION .data ;data section
msg1: db "Here are the array elements:", 10, 0
msg1Len: equ $-msg1
msg2: db "Here is the sorted array:", 10, 0
msg2Len: equ $-msg2
arr: dd 12, 16, 6, 18, 10, 40, 30
arrLen: equ ($-arr)/4 ;number of elements = array length / 4
SECTION .bss
max resd 1 ;declare and reserve space for max
SECTION .text
global bsort
bsort:
push ebp ; save old ebp register
mov ebp, esp ; build a new stack
restart:
mov ebx, arr ; the base address argument is saved in ebx register
mov ecx, arrLen ; the size argument is saved in exc register
sub ecx, 1 ; Last member has no following element to compare with.
; So we need to reduce the counter by 1
top:
mov eax, [ebx] ;; access first array element. Move its value to eax
cmp eax, [ebx+4] ; compare the value of eax ([ebx]) with [ebx+4]
jle noswap ; if value at eax is less or equal to value of [ebx+4]
; no need to exchang values. Jump to noswap
xchg eax, [ebx+4] ; if value at eax > value [ebx+4], exchange
mov [ebx], eax ; store the new exchanged value at [ebx]
jmp restart ; reset the base address and counter. Start over
noswap:
add ebx, 4 ; move to the next array element
loop top ; loop back to the top if the register ecx > 0
leave
ret
global main
main:
push ebp
mov ebp, esp
mov ecx, msg1 ;print msg1
mov edx, msg1Len
call PString
;save array base address in ebx and save sizein in ecx
mov ebx, arr
mov ecx, arrLen; store num elements in ecx
;loop to print array
PrintArray:
mov eax, [ebx] ;move value [ebx] to eax
call PrintDec
call Println
add ebx, 4
loop PrintArray
;call bubblesort
call bsort
mov ecx, msg2
mov edx, msg2Len
call PString
;save arr base add in sbx and size in ecx
mov ebx, arr
mov ecx, arrLen
PrintSortedArray:
mov eax, [ebx]
call PrintDec
call Println
add ebx, 4
loop PrintSortedArray
;exit program and clean stack
mov esp, ebp
pop ebp
ret
PString:; save register values of the called function
pusha
mov eax,4 ; use 'write' system call = 4
mov ebx,1 ; file descriptor 1 = STDOUT
int 80h ; call the kernel
; restore the old register values of the called function
popa
ret
Println:
;will call PString func
;will change content of ecx and edx
;need to save registers used by the main program
section .data
nl db 10
section .text
pusha
mov ecx, nl
mov edx, 1
call PString
;return original register values
popa
ret
PrintDec:
;saves all registers so they return unmodified
;build the function to handle dword size
section .bss
decstr resb 10 ; 10 32-bit digits
ct1 resd 1 ;keep track of dec-string size
section .text
pusha; save registers
mov dword[ct1],0 ;initially assume 0
mov edi, decstr ; edi points to dec-string
add edi, 9 ; moved to the last element of string
xor edx, edx ; clear edx for 64-bit div
whileNotZero:
mov ebx, 10 ; get ready to divide by 10
div ebx ; divide by 10
add edx, '0' ; convert to ascii
mov byte[edi], dl ; put it in string
dec edi ; move to next char in str
inc dword[ct1] ; inc char counter
xor edx, edx ; clear edx
cmp eax, 0 ;is remainder 0?
jne whileNotZero ;if no, keep on looping
inc edi ; conversion finished, bring edi
mov ecx, edi ; back to start of string. make ecx
mov edx, [ct1] ; point to counterm edx gets # chars
mov eax, 4 ; print to stdout
mov ebx, 1
int 0x80 ; call kernel
popa ; restore registers
ret

X86 Assembly Printing Max in Array returns Segmentation fault (core dumped)

My program works, but there is something wrong with my printMax function. The program terminates with a
Segmentation fault (core dumped).
I have tried building a stack for the function and just doing a pusha popa and both ways, I get the seg fault core dumped.
I've tried calling the function, but it just runs twice.
Any idea what I am doing wrong?
SECTION .data ;data section
msg1 : db "Here are the array elements:", 10, 0
msg1Len: equ $-msg1
msg2 : db "Here is the max value in the array:", 10, 0
msg2Len: equ $-msg2
arr : dd 2,4,6,8,10,20,40
arrLen : equ ($-arr)/4 ;number of elements = array length / 4
SECTION .bss
max resd 1 ;declare and reserve space for max
SECTION .text
global main
main:
push ebp
mov ebp, esp
mov ecx, msg1 ;print msg1
mov edx, msg1Len
call PString
;save array base address in ebx and save sizein in ecx
mov ebx, arr
mov ecx, arrLen; store num elements in ecx
;loop to print array
PrintArray:
mov eax, [ebx] ;move value [ebx] to eax
call PrintDec
call Println
add ebx, 4
loop PrintArray
printMax:
section .text
pusha
;reset array to find max
mov ebx, arr
mov ecx, arrLen
loopForMax:
mov eax, [ebx]
cmp eax, [ebx +4]
jle sameMax
mov [max], eax
sameMax:
add ebx, 4 ;move to next element
loop loopForMax
mov ecx, msg2
mov edx, msg2Len
call PString
mov eax, [max]
call PrintDec
call Println
popa
ret
;exit program and clean stack
mov esp, ebp
pop ebp
ret
PString:; save register values of the called function
pusha
mov eax,4 ; use 'write' system call = 4
mov ebx,1 ; file descriptor 1 = STDOUT
int 80h ; call the kernel
; restore the old register values of the called function
popa
ret
Println:
;will call PString func
;will change content of ecx and edx
;need to save registers used by the main program
section .data
nl db 10
section .text
pusha
mov ecx, nl
mov edx, 1
call PString
;return original register values
popa
ret
PrintDec:
;saves all registers so they return unmodified
;build the function to handle dword size
section .bss
decstr resb 10 ; 10 32-bit digits
ct1 resd 1 ;keep track of dec-string size
section .text
pusha; save registers
mov dword[ct1],0 ;initially assume 0
mov edi, decstr ; edi points to dec-string
add edi, 9 ; moved to the last element of string
xor edx, edx ; clear edx for 64-bit div
whileNotZero:
mov ebx, 10 ; get ready to divide by 10
div ebx ; divide by 10
add edx, '0' ; convert to ascii
mov byte[edi], dl ; put it in string
dec edi ; move to next char in str
inc dword[ct1] ; inc char counter
xor edx, edx ; clear edx
cmp eax, 0 ;is remainder 0?
jne whileNotZero ;if no, keep on looping
inc edi ; conversion finished, bring edi
mov ecx, edi ; back to start of string. make ecx
mov edx, [ct1] ; point to counterm edx gets # chars
mov eax, 4 ; print to stdout
mov ebx, 1
int 0x80 ; call kernel
popa ; restore registers
ret

Calling C function in Assembly Segfaults

I am trying to write an assembly program that calls a function in c that will replace certain characters in a string with a predefined character given that the currently character in the char array meets some qualification.
My c file:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
//display *((char *) $edi)
// These functions will be implemented in assembly:
//
int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
int isvowel (int c) {
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
return 1 ;
if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
return 1 ;
return 0 ;
}
int main(){
char *str1;
int r;
// I ran my code through a debugger again, and it seems that when displaying
// the character stored in ecx is listed as "A" (correct) right before the call
// to "add ecx, 1" at which point ecx somehow resets to 0 when it should be "B"
str1 = strdup("ABC 123 779 Hello World") ;
r = strrepl(str1, '#', &isdigit) ;
printf("str1 = \"%s\"\n", str1) ;
printf("%d chararcters were replaced\n", r) ;
free(str1) ;
return 0;
}
And my .asm file:
; File: strrepl.asm
; Implements a C function with the prototype:
;
; int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
;
;
; Result: chars in string are replaced with the replacement character and string is returned.
SECTION .text
global strrepl
_strrepl: nop
strrepl:
push ebp ; set up stack frame
mov ebp, esp
push esi ; save registers
push ebx
xor eax, eax
mov ecx, [ebp + 8] ;load string (char array) into ecx
jecxz end ;jump if [ecx] is zero
mov esi, [ebp + 12] ;move the replacement character into esi
mov edx, [ebp + 16] ;move function pointer into edx
xor bl, bl ;bl will be our counter
firstLoop:
add bl, 1 ;inc bl would work too
add ecx, 1
mov eax, [ecx]
cmp eax, 0
jz end
push eax ; parameter for (*isinsubset)
;BREAK
call edx ; execute (*isinsubset)
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
end:
pop ebx ; restore registers
pop esi
mov esp, ebp ; take down stack frame
pop ebp
ret
When running this through gdb and putting a breakpoint at ;BREAK, it segfaults after I take a step to the call command with the following error:
Program received signal SIGSEGV, Segmentation fault.
0x0081320f in isdigit () from /lib/libc.so.6
isdigit is part of the standard c library that i have included in my c file, so I am not sure what to make of this.
Edit: I have edited my firstLoop and included a secondLoop which should replace any digits with "#", however it seems to replace the entire array.
firstLoop:
xor eax, eax
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
mov ebp, edx ; save function pointer
push eax ; parameter for (*isinsubset)
call edx ; execute (*isinsubset)
;cmp eax, 0
;jne end
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
mov edx, ebp ; restore function pointer
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
mov [ecx], esi
mov edx, ebp
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
Using gdb, when the code gets to secondloop, everything is correct. ecx is showing as "1" which is the first digit in the string that was passed in from the .c file. Esi is displaying as "#" as it should be. However, after I do mov [ecx], esi it seems to fall apart. ecx is displaying as "#" as it should at this point, but once I increment by 1 to get to the next character in the array, it is listed as "/000" with display. Every character after the 1 is replaced with "#" is listed as "/000" with display. Before I had the secondLoop trying to replace the characters with "#", I just had firstLoop looping with it self to see if it could make it through the entire array without crashing. It did, and after each increment ecx was displaying as the correct character. I am not sure why doing mov [ecx], esi would have set the rest of ecx to null.

In your firstLoop: you're loading characters from the string using:
mov eax, [ecx]
which is loading 4 bytes at a tie instead of a single byte. So the int that you're passing to isdigit() is likely to by far out of range for it to handle (it probably uses a simple table lookup).
You can load a single byte using the following Intel asm syntax:
movzx eax, byte ptr [ecx]
A few other things:
it will also have the effect that it probably wouldn't detect the end of the string properly since the null terminator might not be followed by three other zero bytes.
I'm not sure why you increment ecx before processing the first character in the string
the assembly code you posted doesn't appear to actually loop over the string

I've put some comments into your code:-
; this is OK: setting up the stack frame and saving important register
; on Win32, the registers that need saving are: esi, edi and ebx
; the rest can be used without needing to preserve them
push ebp
mov ebp, esp
push esi
push ebx
xor eax, eax
mov ecx, [ebp + 8]
; you said that this checked [ecx] for zero, but I think you've just written
; that wrong, this checks the value of ecx for zero, the [reg] form usually indicates
; the value at the address defined by reg
; so this is effectively doing a null pointer check (which is good)
jecxz end
mov esi, [ebp + 12]
mov edx, [ebp + 16]
xor bl, bl
firstLoop:
add bl, 1
; you increment ecx before loading the first character, this means
; that the function ignores the first character of the string
; and will therefore produce an incorrect result if the string
; starts with a character that needs replacing
add ecx, 1
; characters are 8 bit, not 32 bit (mentioned in comments elsewhere)
mov eax, [ecx]
cmp eax, 0
jz end
push eax
; possibly segfaults due to character out of range
; also, as mentioned elsewhere, the function you call here must conform to the
; the standard calling convention of the system (e.g, preserve esi, edi and ebx for
; Win32 systems), so eax, ecx and edx can change, so next time you call
; [edx] it might be referencing random memory
; either save edx on the stack (push before pushing parameters, pop after add esp)
; or just load edx with [ebp+16] here instead of at the start
call edx
add esp, 4
mov ebx, eax
; more functionality required here!
end:
; restore important values, etc
pop ebx
pop esi
mov esp, ebp
pop ebp
; the result of the function should be in eax, but that's not set up properly yet
ret
Comments on your inner loop:-
firstLoop:
xor eax, eax
; you're loading a 32 bit value and checking for zero,
; strings are terminated with a null character, an 8 bit value,
; not a 32 bit value, so you're reading past the end of the string
; so this is unlikely to correctly test the end of string
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
; you need to keep ebp! its value must be saved (at the end,
; you do a mov esp,ebp)
mov ebp, edx ; save function pointer
push eax ; parameter for (*isinsubset)
call edx ; execute (*isinsubset)
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
mov edx, ebp ; restore function pointer
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
; again, your accessing the string using a 32 bit value, not an 8 bit value
; so you're replacing the matched character and the three next characters
; with the new value
; the upper 24 bits are probably zero so the loop will terminate on the
; next character
; also, the function seems to be returning a count of characters replaced,
; but you're not recording the fact that characters have been replaced
mov [ecx], esi
mov edx, ebp
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
You do seem to be having trouble with the way the memory works, you are getting confused between 8 bit and 32 bit memory access.