I'm trying to learn how to work with SSE and I decided to realize a simple code that computes n^d, using a function that gets called by a C program.
Here's my NASM code:
section .data
resmsg: db '%d^%d = %d', 0
section .bss
section .text
extern printf
; ------------------------------------------------------------
; Function called from a c program, I only use n and d parameters but I left the others
; ------------------------------------------------------------
global main
T equ 8
n equ 12
d equ 16
m equ 20
Sid equ 24
Sn equ 28
main:
; ------------------------------------------------------------
; Function enter sequence
; ------------------------------------------------------------
push ebp ; save Base Pointer
mov ebp, esp ; Move Base Point to current frame
sub esp, 8 ; reserve space for two local vars
push ebx ; save some registries (don't know if needed)
push esi
push edi
; ------------------------------------------------------------
; copy function's parameters to registries from stack
; ------------------------------------------------------------
mov eax, [ebp+T] ; T
mov ebx, [ebp+n] ; n
mov ecx, [ebp+d] ; d
mov edx, [ebp+m] ; m
mov esi, [ebp+Sid] ; Sid
mov edi, [ebp+Sn] ; Sn
mov [ebp-8], ecx ; copy ecx into one of the local vars
;
; pow is computed by doing n*n d times
;
movss xmm0, [ebp+n] ; base
movss xmm1, [ebp+n] ; another copy of the base because xmm0 will be overwritten by the result
loop: mulss xmm0, xmm1 ; scalar mult from sse
dec ecx ; counter--
cmp ecx,0 ; check if counter is 0 to end loop
jnz loop ;
;
; let's store the result in eax by moving it to the stack and then copying to the registry (we use the other local var as support)
;
movss [ebp-4], xmm0
mov eax, [ebp-4]
;
; Print using C's printf
;
push eax ; result
mov ecx, [ebp-8] ; copy the original d back since we used it as loop's counter
push ecx ; exponent
push ebx ; base
push resmsg ; string format
call printf ; printf call
add esp, 24 ; clean the stack from both our local and printf's vars
; ------------------------------------------------------------
; Function exit sequence
; ------------------------------------------------------------
pop edi ; restore the registries
pop esi
pop ebx
mov esp, ebp ; restore the Stack Pointer
pop ebp ; restore the Base Pointer
ret ; get back to C program
Now, what I'd expect is it to print
4^2 = 16
but, instead, I got
4^2 = 0
I've spent my whole afternoon on this and I couldn't find a solution, do you have any hints?
EDIT:
Since it seems a format problem, I tried converting the data using
movss [ebp-4], xmm0
fld dword [ebp-4]
mov eax, dword [ebp-4]
instead of
movss [ebp-4], xmm0
mov eax, [ebp-4]
but I got the same result.
MOVSS moves single precision floats (32-bit). I assume that n is an integer so you can't load it into a XMM register with MOVSS. Use CVTSI2SS instead. printf cannot process single precision floats, which would converted to doubles by the compiler. It's convenient to use CVTSS2SI at this point. So the code should look like:
...
;
; pow is computed by doing n*n d times
;
cvtsi2ss xmm0, [ebp+n] ; load integer
sub ecx, 1 ; first step (n^1) is done
cvtsi2ss xmm1, [ebp+n] ; load integer
loop:
mulss xmm0, xmm1 ; scalar mult from sse
sub ecx, 1
jnz loop
cvtss2si eax, xmm0 ; result as integer
;
; Print using C's printf
;
push eax ; result
mov ecx, [ebp-8] ; copy the original d back since we used it as loop's counter
push ecx ; exponent
push ebx ; base
push resmsg ; string format
call printf ; printf call
add esp, 16 ; clean the stack only from printf's vars
...
Related
%include "asm_io.inc"
;
; initialized data is put in the .data segment
;
segment .data
array: dd 180,32,455,499,388,480,239,346,257,84
fmt: dd ",%d",0
; uninitialized data is put in the .bss segment
;
segment .bss
resd 10
;
; code is put in the .text segment
;
segment .text
extern printf
global asm_main
asm_main:
enter 0,0 ; setup routine
pusha
; The following is just example of how to print an array
push dword 10
push dword array
call print_array
add esp,8 ; clean up stack
; don't delete anything following this comment
popa
mov eax, 0 ; return back to C
leave
ret
segment .data
ListFormat db ",%u", 0
segment .text
global print_array
print_array:
enter 0,0
push esi
push ebx
xor esi, esi ; esi = 0
mov ecx, [ebp+12] ; ecx = n
mov ebx, [ebp+8]
xor edx, edx
mov dl, [ebx + esi] ; ebx = address of array
mov eax,edx
call print_int
dec ecx
inc esi
print_loop:
xor edx,edx
mov dl,[ebx + esi]
push ecx ; printf might change ecx!
push edx ; push array value
push dword ListFormat
call printf
add esp, 8 ; remove parameters (leave ecx!)
inc esi
pop ecx
loop print_loop
call print_nl
pop ebx
pop esi
leave
ret
So this code prints out 180,0,0,0,32,0,0,0,199,1 when I want to print out 180,32,455,499,388,480,239,346,257,84. I think that it's because this is designed to print byte words. I'm trying to print in double words and I'm guessing something in the print_array needs to be changed. I tried mov dl, [ebx+esi*4] but it still doesn't print the array that I want to print. Or does something else needs to be changed to print array of double words?
You could leave it at changing the mov dl, [ebx+esi] instruction into mov edx, [ebx+esi*4], but that would be just half the fun!
1 Why not try to make a loop that can deal with the special case of the first value in the list that doesn't need the comma prefix? No more using print_int.
2 Also don't use the LOOP instruction. It's slow! The pair cmp jb (that can macro-fuse) is much better.
3 And replacing the prolog enter 0,0 and epilog leave codes by simply addressing the parameters via ESP relative addressing is simple enough.
4 Always consider the special cases! What if the array happens to be empty?
print_array:
push ebx
push esi
mov ebx, [esp+12] ; Begin array
mov esi, [esp+16] ; n
test esi, esi
jz done
lea esi, [ebx+esi*4] ; End array
mov edx, ListFormat+1 ; "%u"
more: mov eax, [ebx] ; Array dword value
push eax
push edx ; "%u" first time, ",%u" others
call printf
add esp, 8
add ebx, 4 ; To next dword in the array
mov edx, ListFormat ; ",%u"
cmp ebx, esi ; Current address < Last address ?
jb more ; Yes
call print_nl
done: pop esi
pop ebx
ret
Under the right conditions, keeping ESP fixed inside this loop can be worth doing. See Peter Cordes' comments below this answer.
Next is a version of this code that keeps ESP fixed inside the loop:
print_array:
push ebx
push esi
mov ebx, [esp+12] ; Begin array
mov esi, [esp+16] ; n
test esi, esi
jz done
sub esp, 8 ; Space for the printf args
lea esi, [ebx+esi*4] ; End array
mov edx, ListFormat+1 ; "%u"
more: mov eax, [ebx] ; Array dword value
mov [esp+4], eax
mov [esp], edx ; "%u" first time, ",%u" others
call printf
add ebx, 4 ; To next dword in the array
mov edx, ListFormat ; ",%u"
cmp ebx, esi ; Current address < Last address ?
jb more ; Yes
call print_nl
add esp, 8
done: pop esi
pop ebx
ret
This is probably my final hurdle in learning x86 assembly language.
The following subroutine is giving me a segmentation fault:
;=================================================================
; RemCharCodeFromAToB - removes all chars between a and e from str
; arguments:
; str - string to be processed
; a - start
; e - end
; return value:
; n/a
;-------------------------------------------------------------------
RemCharCodeFromAToB:
; standard entry sequence
push ebp ; save the previous value of ebp for the benefi$
mov ebp, esp ; copy esp -> ebp so that ebp can be used as a $
; accessing arguments
; [ebp + 0] = old ebp stack frame
; [ebp + 4] = return address
mov edx, [ebp + 8] ; string address
while_loop_rcc:
mov cl, [edx] ; obtain the address of the 1st character of the string
cmp cl, 0 ; check the null value
je while_loop_exit_rcc ; exit if the null-character is reached
mov al, cl ; save cl
mov cl, [ebp + 16] ; end-char
push cx ; push end-char
mov cl, [ebp + 12] ; start-char
push cx ; push start-char
push ax; ; push ch
call IsBetweenAandB
add esp, 12
cmp eax, 0 ; if(ch is not between 'a' and 'e')
je inner_loop_exit_rcc
mov eax, edx ; copy the current address
inner_loop_rcc:
mov cl, [eax+1]
cmp cl, 0
je inner_loop_exit_rcc
mov [eax], cl
inc eax
jmp inner_loop_rcc
inner_loop_exit_rcc:
inc edx ; increment the address
jmp while_loop_rcc ; start the loop again
while_loop_exit_rcc:
; standard exit sequence
mov esp, ebp ; restore esp with ebp
pop ebp ; remove ebp from stack
ret ; return the value of temporary variable
;===================================================================
I am suspecting that there is something wrong with data conversions from 32-bit to 8-bit registers and vice-versa. My concept regarding this is not clear yet.
Or, is there something wrong in the following part
mov al, cl ; save cl
mov cl, [ebp + 16] ; end-char
push cx ; push end-char
mov cl, [ebp + 12] ; start-char
push cx ; push start-char
push ax; ; push ch
call IsBetweenAandB
add esp, 12
?
Full asm code is here.
C++ code is here.
Makefile is here.
cx and ax are 16-bit registers, so your push cx ; push cx; push ax are pushing 16-bit values on the stack, a total of 6 bytes. But IsBetweenAandB is apparently expecting 32-bit values, and you add 12 to esp at the end (instead of 6). So you probably wanted push ecx etc.
Also, you probably want to zero out eax and ecx before using them. As it stands, they probably contain garbage initially, and you only load useful data into the low 8 bits al and cl. Thus when IsBetweenAandB tries to compare the full 32-bit values, you are going to get false results. Or else you want to rewrite IsBetweenAandB to only compare the low bytes that you care about.
I asked for help earlier and thought I was home free but I'm not. My logic was wrong and I've greatly altered it. This program is supposed to return the Max int in the array (which also happens to be the last element). After tracing it with GDB a few times, I see that I get to the 5th (2nd to last) element in the array, "20", when I hit a segmentation fault and the program halts. I set ECX to the array length and subtracted 1 from ECX to avoid this, but my logic is obviously wrong. Am I wrong in depending on the ECX counter to terminate the loop. Any ideas please?
***EDIT MAJOR EDITS TO CODE
SECTION .data ;data section
msg1: db "Here are the array elements:", 10, 0
msg1Len: equ $-msg1
msg2: db "Here is the sorted array:", 10, 0
msg2Len: equ $-msg2
arr: dd 12, 16, 6, 18, 10, 40, 30
arrLen: equ ($-arr)/4 ;number of elements = array length / 4
SECTION .bss
max resd 1 ;declare and reserve space for max
SECTION .text
global bsort
bsort:
push ebp ; save old ebp register
mov ebp, esp ; build a new stack
restart:
mov ebx, arr ; the base address argument is saved in ebx register
mov ecx, arrLen ; the size argument is saved in exc register
sub ecx, 1 ; Last member has no following element to compare with.
; So we need to reduce the counter by 1
top:
mov eax, [ebx] ;; access first array element. Move its value to eax
cmp eax, [ebx+4] ; compare the value of eax ([ebx]) with [ebx+4]
jle noswap ; if value at eax is less or equal to value of [ebx+4]
; no need to exchang values. Jump to noswap
xchg eax, [ebx+4] ; if value at eax > value [ebx+4], exchange
mov [ebx], eax ; store the new exchanged value at [ebx]
jmp restart ; reset the base address and counter. Start over
noswap:
add ebx, 4 ; move to the next array element
loop top ; loop back to the top if the register ecx > 0
leave
ret
global main
main:
push ebp
mov ebp, esp
mov ecx, msg1 ;print msg1
mov edx, msg1Len
call PString
;save array base address in ebx and save sizein in ecx
mov ebx, arr
mov ecx, arrLen; store num elements in ecx
;loop to print array
PrintArray:
mov eax, [ebx] ;move value [ebx] to eax
call PrintDec
call Println
add ebx, 4
loop PrintArray
;call bubblesort
call bsort
mov ecx, msg2
mov edx, msg2Len
call PString
;save arr base add in sbx and size in ecx
mov ebx, arr
mov ecx, arrLen
PrintSortedArray:
mov eax, [ebx]
call PrintDec
call Println
add ebx, 4
loop PrintSortedArray
;exit program and clean stack
mov esp, ebp
pop ebp
ret
PString:; save register values of the called function
pusha
mov eax,4 ; use 'write' system call = 4
mov ebx,1 ; file descriptor 1 = STDOUT
int 80h ; call the kernel
; restore the old register values of the called function
popa
ret
Println:
;will call PString func
;will change content of ecx and edx
;need to save registers used by the main program
section .data
nl db 10
section .text
pusha
mov ecx, nl
mov edx, 1
call PString
;return original register values
popa
ret
PrintDec:
;saves all registers so they return unmodified
;build the function to handle dword size
section .bss
decstr resb 10 ; 10 32-bit digits
ct1 resd 1 ;keep track of dec-string size
section .text
pusha; save registers
mov dword[ct1],0 ;initially assume 0
mov edi, decstr ; edi points to dec-string
add edi, 9 ; moved to the last element of string
xor edx, edx ; clear edx for 64-bit div
whileNotZero:
mov ebx, 10 ; get ready to divide by 10
div ebx ; divide by 10
add edx, '0' ; convert to ascii
mov byte[edi], dl ; put it in string
dec edi ; move to next char in str
inc dword[ct1] ; inc char counter
xor edx, edx ; clear edx
cmp eax, 0 ;is remainder 0?
jne whileNotZero ;if no, keep on looping
inc edi ; conversion finished, bring edi
mov ecx, edi ; back to start of string. make ecx
mov edx, [ct1] ; point to counterm edx gets # chars
mov eax, 4 ; print to stdout
mov ebx, 1
int 0x80 ; call kernel
popa ; restore registers
ret
My program works, but there is something wrong with my printMax function. The program terminates with a
Segmentation fault (core dumped).
I have tried building a stack for the function and just doing a pusha popa and both ways, I get the seg fault core dumped.
I've tried calling the function, but it just runs twice.
Any idea what I am doing wrong?
SECTION .data ;data section
msg1 : db "Here are the array elements:", 10, 0
msg1Len: equ $-msg1
msg2 : db "Here is the max value in the array:", 10, 0
msg2Len: equ $-msg2
arr : dd 2,4,6,8,10,20,40
arrLen : equ ($-arr)/4 ;number of elements = array length / 4
SECTION .bss
max resd 1 ;declare and reserve space for max
SECTION .text
global main
main:
push ebp
mov ebp, esp
mov ecx, msg1 ;print msg1
mov edx, msg1Len
call PString
;save array base address in ebx and save sizein in ecx
mov ebx, arr
mov ecx, arrLen; store num elements in ecx
;loop to print array
PrintArray:
mov eax, [ebx] ;move value [ebx] to eax
call PrintDec
call Println
add ebx, 4
loop PrintArray
printMax:
section .text
pusha
;reset array to find max
mov ebx, arr
mov ecx, arrLen
loopForMax:
mov eax, [ebx]
cmp eax, [ebx +4]
jle sameMax
mov [max], eax
sameMax:
add ebx, 4 ;move to next element
loop loopForMax
mov ecx, msg2
mov edx, msg2Len
call PString
mov eax, [max]
call PrintDec
call Println
popa
ret
;exit program and clean stack
mov esp, ebp
pop ebp
ret
PString:; save register values of the called function
pusha
mov eax,4 ; use 'write' system call = 4
mov ebx,1 ; file descriptor 1 = STDOUT
int 80h ; call the kernel
; restore the old register values of the called function
popa
ret
Println:
;will call PString func
;will change content of ecx and edx
;need to save registers used by the main program
section .data
nl db 10
section .text
pusha
mov ecx, nl
mov edx, 1
call PString
;return original register values
popa
ret
PrintDec:
;saves all registers so they return unmodified
;build the function to handle dword size
section .bss
decstr resb 10 ; 10 32-bit digits
ct1 resd 1 ;keep track of dec-string size
section .text
pusha; save registers
mov dword[ct1],0 ;initially assume 0
mov edi, decstr ; edi points to dec-string
add edi, 9 ; moved to the last element of string
xor edx, edx ; clear edx for 64-bit div
whileNotZero:
mov ebx, 10 ; get ready to divide by 10
div ebx ; divide by 10
add edx, '0' ; convert to ascii
mov byte[edi], dl ; put it in string
dec edi ; move to next char in str
inc dword[ct1] ; inc char counter
xor edx, edx ; clear edx
cmp eax, 0 ;is remainder 0?
jne whileNotZero ;if no, keep on looping
inc edi ; conversion finished, bring edi
mov ecx, edi ; back to start of string. make ecx
mov edx, [ct1] ; point to counterm edx gets # chars
mov eax, 4 ; print to stdout
mov ebx, 1
int 0x80 ; call kernel
popa ; restore registers
ret
Basically the program is suppose to input floating point numbers from the user, then get the sum of them, the average of them (from 0.0 to 100.0), anything above is not counted and anything below isn't counted as well. A negative number will not just not be counted but also exit the loop/program. The sum is outputted, number of valid numbers inputted is also outputted, and finally the average of the floating point numbers ROUNDED to nearest integer is outputted. I think I am close right now, my program thus far is below. Please, I could use some help finding the bug/(error in reasoning, if it exists) in my code below. Note I am using NASM.
Update: When it runs and input just the simple 1,2,3,4 it should output 3(rounded to nearest integer) is the average, but it outputs 100, which means it is running all the way through that loop or something similar is going wrong.
Update: I found the problem(s) there should have been a couple more comparisons and should have had more ffree st0 to free up st0, that kept creating a problem. Anyways got it working.
; Declare needed external functions
;
extern printf ; the C function, to be called
extern scanf ; the C function, to be called
SECTION .data ; Data section, initialized variables
three: dq 3.0
erroro: db "Did not work as intended...", 10, 0
zero: dq 0.0
half: dq 0.5
max_val: dq 100.0 ; max value is 100
min_val: dq 0.0 ; min value is 0
input: db "%lf", 0 ; input format
intro: db "Program title is Average and programmer is Joe", 10, 0 ; introduction format
iname: db "Enter your full name", 10, 0 ; get name instructions from user format
gname: db "%s %s", 0 ; get first and lats name
inst: db "Enter a number between 0.0 and 100.0, negative number to exit", 10, 0 ; instruction format
countout: db "The number of valid numbers entered: %d", 10, 0 ; counter output format
sumoutput: db "The sum is: %f", 10, 0 ; sum output format
avgoutput: db "The average is: %d", 10, 0 ; average output format
specialbye: db "You didn't enter any numbers, something wrong %s %s?", 10, 0 ; special goodbye format (if user did not input any valid numbers)
bye: db "Thanks for using this program, %s %s", 10, 0 ; goodbye output format
SECTION .bss ; BSS, uninitialized variables
x: resq 1 ; number entered by user, set to 0
sum: resq 1 ; sum variable is set to 0
avgc: resq 1 ; average used to compare (not what is outputted)
avg: resd 101 ; an array that is used for lookup value of average (it is how it outputs an integer)
count: resd 1 ; counter set to 0
avga: resq 201 ; an array of values from .5 to 100 (by increments of .5) used for comparing avg
fn: resw 10 ; first name set to 0, with a size of 10 words
ln: resw 10 ; last name set to 0, with a size of 10 words
SECTION .text ; Code section.
global main ; the standard gcc entry point
main: ; the program label for the entry point
push ebp ; set up stack frame
mov ebp,esp
mov eax, 0
mov ebx, 0
mov ecx, 101
.setavg: ;average array set up (used to output)
mov [avg+ebx], eax
push eax
mov eax, ebx
add eax, 4
mov ebx, eax
pop eax
add eax, 1
loop .setavg
finit
; get the first one taken care of...
fld qword [zero]
fstp qword [avga+0]
mov ebx, 0
mov ecx, 200
.setavga: ; average array set up to compare average values and then round to nearest integer (sort of..., actually it uses the average array to set which integer to "round to")
fld qword [avga+ebx]
fld qword [half]
fadd
mov eax, ebx
add eax, 8
mov ebx, eax
fstp qword [avga+ebx]
loop .setavga
jmp .start ; skip to .start label, used for testing purposes to reduce user input during testing
; output introduction
push dword intro
call printf
add esp, 4
; output asking for name
push dword iname
call printf
add esp, 4
; get first and last name
push dword ln
push dword fn
push dword gname
call scanf
add esp, 12
; start loop
.start:
; output number input instructions to user
push dword inst
call printf
add esp, 4
;get number
push dword x
push dword input
call scanf
add esp, 8
;compare x and minimum value
;mov eax, [x] ; put x in eax
;cmp eax, min_val ; compare x and minimum value
;jl .post_while ; if x is less than minimum value (which means it is a negative), jump to post while
; compare value to minimum value, if minimum is greater than user input, jump to post_while
fld qword [x]
fld qword [min_val]
fcomip st0, st1
ja .post_while
; free up st0
ffree st0
;compare x and max value
;mov eax, [x] ; put x in eax
;cmp eax, max_val ; compare x and max value
;jg .start ; if x is greater than max value, jump up to start (loop to start)
; compare against max value, if max is less than input jump back to .start label without counting it
fld qword [x]
fld qword [max_val]
fcomip st0, st1
jb .start
;free up st0
ffree st0
; else calculate sum
;mov eax, [sum]
;add eax, [x]
;mov [sum], eax
; calculate sum
fld qword [sum]
fld qword [x]
fadd
fstp qword [sum]
; update counter
mov eax, [count]
add eax, 1
mov [count], eax
jmp .start ; loop back to start
; after loop
.post_while:
; special check: if count = 0 that means no valid numbers have been received by user, then jump down to .special label
;mov eax, [count]
;mov ebx, 0
;cmp eax, ebx
;je .special
; calculate the average (sum/count)
;mov eax, [sum]
;mov ebx, [count]
;cdq
;div ebx
;mov [avg], eax
; calculate average
fld qword [sum]
fild dword [count]
fdiv
fstp qword [avgc]
; calculate rounding (closer to below or above) i.e. 1.3 is closer to 1 so avg is 1
;mov eax, edx
;mov ebx, 2
;mul ebx
;cmp eax, [count]
;jnge .dontadd ; if not greater i.e. 1.3, then jump down to .dontadd label
; else add one to the average
;mov eax, [avg]
;add eax, 1
;mov [avg], eax
; setup counter and index counters
mov ecx, 201
mov esi, 0
mov edi, 0
.roundloop: ; loop that rounds the average to nearest integer
; caluclate if going to increase edi (which is used for avg array *integer array*), if ecx is divisible by 2, then increase it.
mov eax, ecx
mov ebx, 2
cdq
div ebx
mov ebx, 0
cmp edx, ebx
jne .dontinc
inc edi
.dontinc:
; calculate if avga at index esi is above avgc (average calculate value), if so found where it is and end loop
fld qword [avgc]
fld qword [avga+esi]
;fld qword [three]
fcomip st0, st1
ja .endrloop
; increment esi by 8
mov eax,esi
mov ebx, 8
add eax, ebx
mov esi, eax
loop .roundloop
.endrloop:
mov ebx, edi ; save edi index of avg (integer array)
; means it is not closer to next integer
.dontadd:
push ebx
; output count
push dword [count]
push dword countout
call printf
add esp, 8
pop ebx
push ebx
; output sum
push dword [sum+4]
push dword [sum]
push dword sumoutput
call printf
add esp, 12
; output average
pop ebx
mov eax, ebx
mov ebx, 4
mul ebx
mov ebx, eax
push dword [avg+ebx]
push dword avgoutput
call printf
add esp, 8
jmp .regular ; output should be normal, since we got to here.
; special case where count == 0, meaning no valid numbers have been inputted
.special: ; not used in testing purposes at the moment
; output special goodbye message
push dword ln
push dword fn
push dword specialbye
call printf
add esp, 12
jmp .small ; now small jump to skip rest of output
.regular:
; output regular bye message
push dword ln
push dword fn
push dword bye
call printf
add esp, 12
; small jump used only in special case
.small:
mov esp, ebp ; takedown stack frame
pop ebp ; same as "leave" op
mov eax,0 ; normal, no error, return value
ret ; return
Glad you found a solution!
Could you tell us why you did the following the hard way?
.setavg: ;average array set up (used to output)
mov [avg+ebx], eax
push eax
mov eax, ebx
add eax, 4
mov ebx, eax
pop eax
add eax, 1
loop .setavg
Usual version
.setavg: ;average array set up (used to output)
mov [avg+ebx], eax
ADD EBX, 4
add eax, 1
loop .setavg