ASSEMBLY: Binary Search on a Sorted String Array - c

After having spent several days on this binary search problem, which must be done completely in Assembly, I'm not quite sure where my logic breaks down when it comes to searching for name matches [case-insensitive] from a sorted array.
Any help would be greatly appreciated:
THE C PROGRAM
/*
int b_search (char list[100][20], int count, char* token);
list – the starting address of the list of names to be searched
count – total number of names in the list
token – name to be searched in the list
*/
This is the list of names:
Arturo Bryan chris David Jon Mark shane SIMON Thomas TONY
The following are all tokens, names to be searched in the list:
// tests elements with exact match, for example: "Jon", "shane", "TONY"
// tests case insensitivity, for example: "Chris", "BryAN"
// tests if code detects partial and nonsensical queries, for example: "Art" [short for Arturo], "garbage"
// tests a list with an odd number of names, for example: "DAVe", "Jona"
I've constantly been getting either infinite loops somewhere in the logic, finding
that the index returned is always 0 or finding that it incorrectly returns "name not found."
Again, any help would be greatly appreciated; thanks for reading.
// =================================================================================
MY CODE:
int b_search (char list[100][20], int count, char* token)
{
__asm
{
// Function returns the positionmove of the token in the list, starting with 1.
// If name is NOT found, return 0.
// Registers used:
// EAX: logical OR value
// EBX: toLowercase() loop counter
// ECX: total number of names
// EDI: name to be searched
// ESI: list pointer
mov eax, 0 ; // zero out the result
mov ebx, 0 ; // zero out EBX for use
mov esi, list ; // move the list pointer to ESI
mov edi, token ; // the name to be searched to EDI
// YOUR CODE HERE
// list - the starting address of the list of names to be searched
// count - total number of names in the list
// token - name to be searched in the list
// ==================================================================================================================
// CHANGE TOKEN LETTERS TO LOWERCASE
// ==================================================================================================================
TOKEN_LOWERCASE: // Cycles through every char in token and converts them to lowercase
mov eax, [edi + ebx] ; // move char of token into EAX
or eax, 0x20 ; // convert to lowercase by logical OR with 0010 0000
mov [edi + ebx], eax ; // move new char back into EAX
inc ebx ; // increments loop counter
cmp [edi + ebx], 0x00 ; // checks if the next char is a null terminator
jnz TOKEN_LOWERCASE ; // exit loop in the presence of a null terminator
// ==================================================================================================================
// BINARY SEARCH RECURSION - FIRST ITERATION LOCATION
// All registers are now open except for EDI and ESI
// ==================================================================================================================
mov eax, 0 ; // set the minimum value to be index first [0]
mov ecx, count ; // set the maximum value to be index last [index.length]
mov edx, 0 ; // zero out EDX for use
push eax ; // push minimum value EAX back onto stack
push ecx ; // push maximum value ECX back onto stack
BEGIN_BINARY_SEARCH: // return here for recursion
mov eax, 0 ; // zero out EAX for use
//mov ebx, 0 ; // zero out EBX for use
mov ecx, 0 ; // zero out ECX for use
mov edx, 0 ; // zero out EDX for use
// FIRST IN, LAST OUT
pop ecx ; // maximum value; first in, last out
pop eax ; // minimum value; first in, last out
cmp ecx, eax ; // compares the maximum and minimum values
jl DONE_EXIT ; // all operations completed, goto DONE_EXIT [KNOWN ISSUE]
mov edx, eax ; // move EAX into EDX
add edx, ecx ; // add EAX and ECX, store it into EDX
sar edx, 0x01 ; // shifts arithmetic right, dividing EDX by 2
// FIRST IN, LAST OUT
push eax ; // push minimum value EAX back onto stack
push ecx ; // push maximum value ECX back onto stack
mov eax, 0 ; // move EAX to 0 for use *****
mov ebx, 0 ; // move EBX to 0 for use [external counter, see "RECURSION CONCLUDES"]
mov ecx, 0 ; // move ECX to 0 for use
// ==============================================================================================================
// INNER RECURSIVE LOOP
// Registers to keep track of:
// ECX = token[i]
// EAX = element[i]
// ==============================================================================================================
GO_LOWER: // loop to check if cursor needs to go lower
mov ecx, edx ; // move EDX and copy it into ECX; SEE BELOW:
imul ecx, 0x14 ; // OFFSET_TOTAL = COUNT * 20[Decimal]
add ecx, ebx ; // adds offset to EBX
mov eax, [esi + ecx] ; // moves element[i] into EAX, where list + 20 * externalCount + internalCount
// ECX held the offset; it has been moved to EAX, so ECX can be reset
mov ecx, 0 ; // reset ECX with every iteration to prepare for another address's contents
mov ecx, [edi + ebx] ; // move token element into ECX
cmp eax, 0x00 ; // compares EAX to zero; checks for null terminator; SEE BELOW:
jz NULL_TERM_CHECK ; // if IS zero, then jump to IS_NULL
jnz NOT_NULL ; // if NOT zero, then jump to NOT_NULL
// ==========================================================================================================
NULL_TERM_CHECK: // procedure to check contents of ECX are a null terminator at this point
//cmp ecx, 0x00 ; // checks for null terminator
cmp ecx, eax ; // compares token and element
jz IS_MATCH ; // if IS null terminator, then reached end of String
jl DONE_GO_LOWER ; // if token.length() is shorter then element.length()
jg DONE_GO_HIGHER ; // if token.length() is longer than element.length()
//jnz DONE_EXIT ; // if NOT null terminator, function is not yet finished; proceed:
// ==========================================================================================================
NOT_NULL: // proceed with the rest of the function
or eax, 0x20 ; // logical OR with EAX will return the letter in lowercase
sub ecx, eax ; // -32 -> 0 -> +32; result indicates need to jump DONE_GO_LOWER or DONE_GO_HIGHER
jl DONE_GO_LOWER ; // jump to GO_LOWER if less than zero;
jg DONE_GO_HIGHER ; // jump to GO_HIGHER if greater than zero
inc ebx ; // increments loop counter if slips through
jmp GO_LOWER ; // return to GO_LOWER for recursion
// ==============================================================================================================
// ==================================================================================================================
// RECURSION CONCLUDES - END ITERATION LOCATION
// Registers EAX, EBX and ECX are now open
// Register EDX is reserved for being the external loop counter
// ==================================================================================================================
// ==================================================================================================================
DONE_GO_LOWER:
// FIRST IN, LAST OUT
pop ecx ; // pop maximum value back into ECX from stack
pop eax ; // pop minimum value back into EAX from stack
mov ecx, edx ; // move EDX into ECX, copying the value
sub ecx, 0x01 ; // subtracts 1 from current makes the maximum
push eax ; // push minimum value EAX back onto stack
push ecx ; // push maximum value ECX back onto stack
jmp BEGIN_BINARY_SEARCH ; // jump back to beginning of recursion
// ==================================================================================================================
// ==================================================================================================================
DONE_GO_HIGHER:
// FIRST IN, LAST OUT
pop ecx ; // pop maximum value back into ECX from stack
pop eax ; // pop minimum value back into EAX from stack
mov eax, edx ; // move EDX into EAX, updating the minimum
add eax, 0x01 ; // adds 1 to current makes the minimum
push eax ; // push minimum value EAX back onto stack
push ecx ; // push maximum value ECX back onto stack
jmp BEGIN_BINARY_SEARCH ; // jump back to beginning of recursion
// ==================================================================================================================
DONE_EXIT:
mov eax, 0 ; // move eax back to 0 to finish up
jmp DONE ; // jump to default done location
// ==================================================================================================================
IS_MATCH:
mov eax, edx ; // move ESP contents into EAX
jmp DONE ; // done with everything
// END PROCEDURE: DEFAULT TO HERE WHEN FINISHED
DONE: // ALL OPERATIONS FINISHED
}
}

#Edward is exactly right. Here is a C routine that's not hard to translate. My quick assembly version turned out to have 39 instructions.
#include <stdio.h>
int bsearch(char a[][20], int count, char *key)
{
// Answer lies in a[lo .. hi-1].
int lo = 0, hi = count;
while (lo < hi) {
// Midpoint of range where answer must lie.
int mid = (lo + hi) / 2;
// This simulates condition codes for key comparison.
int cmp;
// Pointers and character values from key and midpoint strings.
char *p_key = key, *p_mid = a[mid], ch_key, ch_mid;
// Pointers advance together through key and midpoint strings, stopping at '\0'.
for (;;) {
// Fetch characters from key and array.
ch_key = *p_key, ch_mid = *p_mid;
// Break if null is found;
if (ch_key == 0 || ch_mid == 0) break;
// Convert to lower case if necessary.
if ('A' <= ch_key && ch_key <= 'Z') ch_key += 'a' - 'A';
if ('A' <= ch_mid && ch_mid <= 'Z') ch_mid += 'a' - 'A';
// Break if inequality is found.
if (ch_key != ch_mid) break;
// Move to next character pair.
p_key++;
p_mid++;
}
// Set the condition codes based on character difference.
cmp = ch_key - ch_mid;
// If equal, we're done.
if (cmp == 0) return mid;
// Shrink the range based on comparison result.
if (cmp < 0) hi = mid;
else lo = mid + 1;
}
return -1;
}
int main(void) {
static char a[][20] = {
"Arturo", "Bryan", "chris", "David", "Jon", "Mark", "shane", "SIMON", "Thomas", "TONY"
};
static char keys[][20] = {
"ARTURo", "brYAn", "cHRiS", "dAvID", "jON", "MaRk", "sHAne", "sImON", "THOmas", "TonY" , "AAAA", "foo", "ZZZZZ"
};
#define COUNT(A) (sizeof A / sizeof A[0])
int i;
for (i = 0; i < COUNT(keys); i++) {
printf("%s: %d\n", keys[i], bsearch(a, COUNT(a), keys[i]));
}
return 0;
}

There are a number of problems with the code. In no particular order, here's what I found:
Altering the passed token
The code alters the token that was passed in, which may be acceptable, but you must check for a NUL terminator before you or the value with 0x20 and store it back in place.
Better would be to set the string to lowercase only when doing the comparison, leaving the passed string unaltered.
Comments are not helpful
Lines like this:
mov edx, eax ; // move EAX into EDX
are not helpful. A programmer can see that the contents of EAX is being moved into EDX. The comment should tell me why this is happening.
Problems keeping track of register contents
When you have mov ecx, 0 and two lines later pop ecx it tells me (and it should tell you!) that you're not keeping track of the contents of the register. Those are comments that will help you; I often write comment blocks above labeled lines (jump targets) that tell what I'm expecting to be in each register and on the stack. You have that in a few places, but not all of the relevant registers are listed. It really helps debugging code like this. Ideally, you'd assign some purpose to each register and then use them only for that purpose in the entire rest of the code.
Failure to lowercase the list entries
In the code after the GO_LOWER label, you're loading a letter of the next entry in the list into eax and the next letter of the token into ecx but only the latter has been converted to lowercase for comparison.
Convoluted branching
These lines are overly complex:
cmp eax, 0x00
jz NULL_TERM_CHECK
jnz NOT_NULL
NULL_TERM_CHECK:
This could be very much simplified:
cmp eax, 0x00
jnz NOT_NULL
Because the code will go to the next instruction anyway, you no longer need the other branch, and because it is used nowhere else, you can also eliminate the label.
Binary search confusion
The intent appears to be to do a binary search, which typically starts in the middle of an ordered list and then does a comparison to figure out whether the item is in the top half or the bottom half of the range. Your code appears to be starting from the first element rather than the middle and things can't work very well from there.
General advice:
Try writing a correct and working routine in C and then replace a small portion at a time with the corresponding assembly language routine. You'll spend less time debugging the basic algorithm and more time successfully implementing working assembly language code.

Related

x86 function returning char* in C

I want to write a function in x86 which will be called from C program.
The function should look like this:
char *remnth(char *s, int n);
I want it to remove every nth letter from string s and return that string. Here's my remnth.s file:
section.text
global remnth
remnth:
; prolog
push ebp
mov ebp, esp
; procedure
mov eax, [ebp+8]; Text in which I'm removing every nth letter
mov ebx, [ebp+12]; = n
mov ecx, [ebp+8] ; pointer to next letter (replacement)
lopext:
mov edi, [ebp+12] ; edi = n //setting counter
dec edi ; edi-- //we don't go form 'n' to '1' but from 'n-1' to '0'
lop1:
mov cl, [ecx] ; letter which will be a replacement
mov byte [eax], cl ; replace
test cl,cl ; was the replacement equal to 0?
je exit ; if yes that means the function is over
inc eax ; else increment pointer to letter which will be replaced
inc ecx ; increment pointer to letter which is a replacement
dec edi ; is it already nth number?
jne lop1 ; if not then repeat the loop
inc ecx ; else skip that letter by proceeding to the next one
jmp lopext ; we need to set counter (edi) once more
exit:
; epilog
pop ebp
ret
The problem is that when I'm calling this function from main() in C I get Segmentation fault (core dumped)
From what I know this is highly related to pointers, in this case I'm returning *char, and since I've seen some functions that returns int and they worked just fine, I suspect that I forgot about something important with returning a *char properly.
This is what my C file looks like:
#include <stdio.h>
extern char *remnth(char *s,int n);
int main()
{
char txt[] = "some example text\0";
printf("orginal = %s\n",txt);
printf("after = %s\n",remnth(txt,3));
return 0;
}
Any help will be appreciated.
You're using ecx as a pointer, and cl as a work register. Since cl is the low 8 bits of ecx, you're corrupting your pointer with the mov cl, [ecx] instruction. You'll need to change one or the other. Typically, al/ax/eax/rax is used for a temporary work register, as some accesses to the accumulator use shorter instruction sequences. If you use al as a work register, you'll want to avoid using eax as a pointer and use a different register instead (remembering to preserve its contents if necessary).
You need to load the return value into eax before the return. I assume you want to return a pointer to the beginning of the string, so that would be [ebp+8].

ASSEMBLY - output an array with 32 bit register vs 16 bit

I'm was working on some homework to print out an array as it's sorting some integers from an array. I have the code working fine, but decided to try using EAX instead of AL in my code and ran into errors. I can't figure out why that is. Is it possible to use EAX here at all?
; This program sorts an array of signed integers, using
; the Bubble sort algorithm. It invokes a procedure to
; print the elements of the array before, the bubble sort,
; once during each iteration of the loop, and once at the end.
INCLUDE Irvine32.inc
.data
myArray BYTE 5, 1, 4, 2, 8
;myArray DWORD 5, 1, 4, 2, 8
currentArray BYTE 'This is the value of array: ' ,0
startArray BYTE 'Starting array. ' ,0
finalArray BYTE 'Final array. ' ,0
space BYTE ' ',0 ; BYTE
.code
main PROC
MOV EAX,0 ; clearing registers, moving 0 into each, and initialize
MOV EBX,0 ; clearing registers, moving 0 into each, and initialize
MOV ECX,0 ; clearing registers, moving 0 into each, and initialize
MOV EDX,0 ; clearing registers, moving 0 into each, and initialize
PUSH EDX ; preserves the original edx register value for future writeString call
MOV EDX, OFFSET startArray ; load EDX with address of variable
CALL writeString ; print string
POP EDX ; return edx to previous stack
MOV ECX, lengthOf myArray ; load ECX with # of elements of array
DEC ECX ; decrement count by 1
L1:
PUSH ECX ; save outer loop count
MOV ESI, OFFSET myArray ; point to first value
L2:
MOV AL,[ESI] ; get array value
CMP [ESI+1], AL ; compare a pair of values
JGE L3 ; if [esi] <= [edi], don't exch
XCHG AL, [ESI+1] ; exchange the pair
MOV [ESI], AL
CALL printArray ; call printArray function
CALL crlf
L3:
INC ESI ; increment esi to the next value
LOOP L2 ; inner loop
POP ECX ; retrieve outer loop count
LOOP L1 ; else repeat outer loop
PUSH EDX ; preserves the original edx register value for future writeString call
MOV EDX, OFFSET finalArray ; load EDX with address of variable
CALL writeString ; print string
POP EDX ; return edx to previous stack
CALL printArray
L4 : ret
exit
main ENDP
printArray PROC uses ESI ECX
;myArray loop
MOV ESI, OFFSET myArray ; address of myArray
MOV ECX, LENGTHOF myArray ; loop counter (5 values within array)
PUSH EDX ; preserves the original edx register value for future writeString call
MOV EDX, OFFSET currentArray ; load EDX with address of variable
CALL writeString ; print string
POP EDX ; return edx to previous stack
L5 :
MOV AL, [ESI] ; add an integer into eax from array
CALL writeInt
PUSH EDX ; preserves the original edx register value for future writeString call
MOV EDX, OFFSET space
CALL writeString
POP EDX ; restores the original edx register value
ADD ESI, TYPE myArray ; point to next integer
LOOP L5 ; repeat until ECX = 0
CALL crlf
RET
printArray ENDP
END main
END printArray
; output:
;Starting array. This is the value of array: +1 +5 +4 +2 +8
;This is the value of array: +1 +4 +5 +2 +8
;This is the value of array: +1 +4 +2 +5 +8
;This is the value of array: +1 +2 +4 +5 +8
;Final array. This is the value of array: +1 +2 +4 +5 +8
As you can see the output sorts the array just fine from least to greatest. I was trying to see if I could move AL into EAX, but that gave me a bunch of errors. Is there a work around for this so I can use a 32 bit register and get the same output?
Using EAX is definitely possible, in fact you already are. You asked "I was trying to see if I could move AL into EAX, but that gave me a bunch of errors." Think about what that means. EAX is the extended AX register, and AL is the lower partition of AX. Take a look at this diagram:image of EAX register
. As you can see, moving AL into EAX using perhaps the MOVZX instruction would simply put the value in AL into EAX and fill zeroes in from right to left. You'd be moving AL into AL, and setting the rest of EAX to 0. You could actually move everything into EAX and run the program just the same and there'd be no difference because it's using the same part of memory.
Also, why are you pushing and popping EAX so much? The only reason to push/pop things from the runtime stack is to recover them later, but you never do that, so you can just let whatever is in EAX at the time just die.
If you still want to do an 8-bit store, you need to use an 8-bit register. (AL is an 8-bit register. IDK why you mention 16 in the title).
x86 has widening loads (movzx and movsx), but integer stores from a register operand always take a register the same width as the memory operand. i.e. the way to store the low byte of EAX is with mov [esi], al.
In printArray, you should use movzx eax, byte ptr [esi] to zero-extend into EAX. (Or movsx to sign-extend, if you want to treat your numbers as int8_t instead of uint8_t.) This avoids needing the upper 24 bits of EAX to be zeroed.
BTW, your code has a lot of unnecessary instructions. e.g.
MOV EAX,0 ; clearing registers, moving 0 into each, and initialize
totally pointless. You don't need to "init" or "declare" a register before using it for the first time, if your first usage is write-only. What you do with EDX is amusing:
MOV EDX,0 ; clearing registers, moving 0 into each, and initialize
PUSH EDX ; preserves the original edx register value for future writeString call
MOV EDX, OFFSET startArray ; load EDX with address of variable
CALL writeString ; print string
POP EDX ; return edx to previous stack
"Caller-saved" registers only have to be saved if you actually want the old value. I prefer the terms "call-preserved" and "call-clobbered". If writeString destroys its input register, then EDX holds an unknown value after the function returns, but that's fine. You didn't need the value anyway. (Actually I think Irvine32 functions at most destroy EAX.)
In this case, the previous instruction only zeroed the register (inefficiently). That whole block could be:
MOV EDX, OFFSET startArray ; load EDX with address of variable
CALL writeString ; print string
xor edx,edx ; edx = 0
Actually you should omit the xor-zeroing too, because you don't need it to be zeroed. You're not using it as counter in a loop or anything, all the other uses are write-only.
Also note that XCHG with memory has an implicit lock prefix, so it does the read-modify-write atomically (making it much slower than separate mov instructions to load and store).
You could load a pair of bytes using movzx eax, word ptr [esi] and use a branch to decide whether to rol ax, 8 to swap them or not. But store-forwarding stalls from byte stores forwarding to word loads isn't great either.
Anyway, this is getting way off topic from the title question, and this isn't codereview.SE.

Assembly Language code to find positive numbers in a Array of numbers

We can only edit bold part to find positive number from given array. This is what i have tried in visual basic and i am just getting result as zero, Can someone say where it got wrong?
int solution(const int arr[], size_t arr_size)
{
int result = 0;
__asm
{
**MOV eax, arr
MOV edx, eax
MOV ebx, 10
XOR ecx, ecx
LEA esi, size arr
NEXT2 :
MOV edi, esi
SHR edi, 10
JNC NEXT1
JMP NEXT3
NEXT1 : INC ecx
NEXT3 : INC SI
DEC ebx
JNZ NEXT2
MOV[result], ecx;**
}
return result;
}
int main()
{
int result;
int arr[] = { 0, -1, 2, -3, 4, -5, 6, -7, 8, -9 };
result = solution(arr, sizeof(arr) / sizeof(arr[0]));
printf("Grade 6 result = %d\n", result);
getchar();
return 0;
}
1) Here is a piece of code I use to get the size of an array:
mov ebx, 0 ; set ebx to zero to start checking at index 0
countloop:
inc ebx ;Increase ebx for next loop
cmp arr[ebx], '\0' ;compare arr at index [ebx] to end char '\0'
jne countloop ;if not equal, jump back and try for next index
mov arrlength, ebx ;if equal to '\0', load the value of ebx (actual length of the array) into the empty length variable
The reason why you look for '\0' is that the string is stored like a char array and the register only stores the first char, the loops to get the other until it get that 'end' characters. I believe other characters would get the loop to stop, not sure which one, but \0 does work
2) Then use the value stored in arrlength as the number of loops you will need to check your array and find positives.
MOV ecx, arrlength ; This sets the loop counter register (ECX) to the size of your array
MOV ebx, 0 ; Set this to 0 as we will use it again as index
MOV esi, 0 ; Same
compareLoop:
MOV eax, arr[ebx] ; load value of arr at index ebx
CMP eax, 0 ; sets flag, comparing eax to 0
JL lessThan ;JL --> jump if first operand lower than second operand
MOV newArr[esi], eax ;Put the value (which is >0, in a new array)
add esi, 4 ; To point to the next position
lessThan:
add ebx, 4 ; adding for to ebx so that now it has the index of next value in array
loop compareLoop ; until ecx = 0
I basically showed you how I would do it, I am far from a pro, and simply don't understand the way you proceeded.

MASM Why doesn't decrementing a register find the next value in an array?

I'm testing to see if an entered string is a palindrome by taking the string, moving it into a character array and comparing first and last elements of the char array to each other to test. I can get the first element of the array to find the second character easily, but to find the last acceptable value and decrement that, it doesn't find the next character in the array. So if the corrected/cleaned char array looks like:
['A']['B']['C']['D']['A']
ebx will go from 'A' -> 'B' but edi will not change from 'A' -> 'D'
Why will ebx change characters but edi only subtracts 1 from it's register value? What can I do to have edi change character value? Thanks!
C++ code: (just in case)
#include <iostream>
#include <cstring>
#include <sstream>
using namespace std;
extern"C"
{
char stringClean(char *, int);
char isPalindrome(char *, int);
}
int main()
{
int pal = 0;
const int SIZE = 30;
string candidate = "";
char strArray[SIZE] = { '\0' };
cout << "enter a string to be tested: ";
getline(cin, candidate);
int j = 0;
for (int i = 0; i < candidate.length(); i++) //getting rid of garbage before entering into array
{
if (candidate[i] <= 'Z' && candidate[i] >= 'A' || candidate[i] <= 'z' && candidate[i] >= 'a')
{
strArray[j] = candidate[i];
j++;
}
}
if (int pleaseWork = stringClean(strArray, SIZE) == 0)
pal = isPalindrome(strArray, SIZE);
if (pal == 1)
cout << "Your string is a palindrome!" << endl;
else
cout << "Your string is NOT a palindrome!" << endl;
system("pause");
return 0;
}
masm code:
.686
.model flat
.code
_isPalindrome PROC ; named _test because C automatically prepends an underscode, it is needed to interoperate
push ebp
mov ebp,esp ; stack pointer to ebp
mov ebx,[ebp+8] ; address of first array element
mov ecx,[ebp+12] ; number of elements in array
mov ebp,0
mov edx,0
mov eax,0
push edi ;save this
push ebx ;save this
mov edi, ebx ;make a copy of first element in array
add edi, 29 ;move SIZE-1 (30 - 1 = 29) elements down to, HOPEFULLY, the last element in array
mov bl, [ebx]
mov dl, [edi]
cmp dl, 0 ;checks if last element is null
je nextElement ;if null, find next
jne Comparison ;else, start comparing at Comparison:
nextElement:
dec edi ;finds next element
mov dl, [edi] ;move next element into lower edx
cmp dl, 0 ;checks if new element is mull
je nextElement ;if null, find next
jne Comparison ;else, start comparing at Comparison:
Comparison:
cmp bl,dl ;compares first element and last REAL element
je testNext ;jump to textNext: for further testing
mov eax,1 ;returns 1 (false) because the test failed
jne allDone ;jump to allDoneNo because it's not a palindrome
testNext:
dec edi ;finds last good element -1 --------THIS ISN'T DOING the right thing
inc ebx ;finds second element
cmp ebx, edi ;checks if elements are equal because that has tested all elements
je allDone
;mov bl,[ebx] ;move incremented ebx into bl
;mov dl,[edi] ;move decremented edi into dl
jmp Comparison ;compare newly acquired elements
allDone:
xor eax, eax
mov ebp, eax
pop edi
pop edx
pop ebp
ret
_isPalindrome ENDP
END
I haven't tested your code, but looking at it I noticed some possible problems.
Why will ebx change characters
It seems that way, but it's not what you tried to reach. You commented out the lines reading the characters from memory/the array after the initial phase (see below). So in fact, you did change the character in EBX, but not the way you expected (and supposedly wanted). With INC EBX you increased the char-value from 'A'(65dec) to 'B'(66dec). That 'B' is also the second char of the string is merely a coincidence. Try changing the string from ABCDA to ARRCD or something and you'd still get a 'B' on the second round. So EBX does indeed change.
...
;mov bl,[ebx] ;move incremented ebx into bl
;mov dl,[edi] ;move decremented edi into dl
jmp Comparison ;compare newly acquired elements
...
but edi only subtracts 1 from it's register value?
What can I do to have edi change character value?
Yes. That's what your code does and it's correct. Uncomment the above line containing [edi] and the char pointed at by EDI will be loaded into the lower byte of EDX = DL.
The problem with your code is that you are using EBX both as a pointer and (char)value. Loading the next char into EBX will destroy the pointer and your programm is likely to crash with ACCESS_VIOLATION in the next iteration or show random behaviour which would be hard to debug.
Separate pointers from values like you have done with EDI/EDX (EDI=pointer to char, EDX(DL)=char value.
Another problem is: your code will only work for strings with an odd length.
testNext:
dec edi ; !!!
inc ebx ; !!!
cmp ebx, edi ; checks if elements are equal because that has tested all elements
je allDone
So you are increasing and decreasing the (should be) pointers and then comparing them. Now consider this case of an even-length-string:
ABBA
^ ^ (EBX(first) and EDI(second))
=> dec both =>
ABBA
^^ (EBX(first) and EDI(second))
=> dec both =>
ABBA
^^ (EDI(first) and EBX(second))
=> dec both =>
ABBA
^ ^ (EDI(first) and EBX(second))
=> dec both =>
ABBA
^ ^ (EDI(first) and EBX(second))
...
=> Problem! Won't terminate, condition EBX=EDI will never be met*
Possible solution: Add an A(Above = Greater for unsigned values) to the jump
...
cmp ebx, edi
jae allDone

How do I put a register into an array index in MASM?

I'm having a really hard time with arrays in MASM. I don't understand how to put the value of a register into an index of an array. I can't seem to find where arr[i] is. What is it I'm missing or what do I have wrong?
Thanks for your time!
C++ code:
#include <iostream>
using namespace std;
extern"C"
{
char intToBinary(char *, int, int);
}
int main()
{
const int SIZE = 16;
char arr[SIZE] = { '/0' };
cout << "What integer do you want converted?" << endl;
cin >> decimal;
char value = intToBinary(arr, SIZE, decimal);
return 0;
}
Assembly code:
.686
.model flat
.code
_intToBinary PROC ; named _test because C automatically prepends an underscode, it is needed to interoperate
push ebp
mov ebp,esp ; stack pointer to ebp
mov ebx,[ebp+8] ; address of first array element
mov ecx,[ebp+12] ; number of elements in array
mov edx, 0 ;has to be 0 to check remainder
mov esi, 2 ;the new divisor
mov edi, 12
LoopMe:
add ebx, 4
xor edx, edx ;keep this 0 at all divisions
div esi ;divide eax by 2
inc ebx ;increment by 1
mov [ebp + edi], edx ;put edx into the next array index
add edi, 4 ;add 4 bytes to find next index
cmp ecx, ebx ;compare iterator to number of elements (16)
jg LoopMe
pop ebp ;return
ret
_intToBinary ENDP
END
In your C++ code
decimal is not defined.
'/0' is invalid character literal. Use \, not /, to write escape sequences in C++.
value isn't used.
Your code should be like this:
#include <iostream>
using namespace std;
extern"C"
{
char intToBinary(char *, int, int);
}
int main()
{
const int SIZE = 16;
char arr[SIZE] = { '\0' };
int decimal;
cout << "What integer do you want converted?" << endl;
cin >> decimal;
intToBinary(arr, SIZE, decimal);
for (int i = SIZE - 1; i >= 0; i--) cout << arr[i];
cout << endl;
return 0;
}
In your assembly code
You stored the "address of first array element" to ebx by mov ebx,[ebp+8], so the address of arr will be there.
Unfortunately, it is destroyed by add ebx, 4 and inc ebx.
"put edx into the next array index" No, [ebp + edi] isn't the next array index and it is destoying data on the stack. It is very bad.
Don't add 4 bytes to "find next index" if your size of char is 1 byte.
Your code should be like this (Sorry, this is nasm code because I am unfamiliar to masm):
bits 32
global _intToBinary
_intToBinary:
push ebp
mov ebp, esp ; stack pointer to ebp
push esi ; save this register before breaking in the code
push edi ; save this, too
push ebx ; save this, too
mov ebx, [ebp + 8] ; address of first array element
mov ecx, [ebp + 12] ; number of elements in array
mov eax, [ebp + 16] ; the number to convert
xor edi, edi ; the index of array to store
mov esi, 2 ; the new divisor
LoopMe:
xor edx, edx ; keep this 0 at all divisions
div esi ; divide eax by 2
add dl, 48 ; convert the number in dl to a character representing it
mov [ebx + edi], dl ; put dl into the next array index
inc edi ; add 1 byte to find next index
cmp ecx, edi ; compare iterator to number of elements
jg LoopMe
xor eax, eax ; return 0
pop ebx ; restore the saved register
pop edi ; restore this, too
pop esi ; restore this, too
mov esp, ebp ; restore stack pointer
pop ebp
ret
Note that this code will store the binary text in reversed order, so I wrote the C++ code to print them from back to front.
Also note that there are no terminating null character in arr, so do not do cout << arr;.
You have the address of the first array element in ebx, and edi is your loop counter. So mov [ebx + edi], edx would store edx into arr[edi].
Also note that your loop condition is wrong (your cmp is comparing the number of elements against the starting address of the array.)
Avoid div whenever possible. To divide by two, right-shift by one. div is very slow (like 10 to 30 times slower than a shift).
BTW, since you have a choice of which registers to use (out of the ones the ABI says you're allowed to clobber without saving/restoring), edi is used for a "destination" pointer by convention (i.e. when it doesn't cost any extra instructions), while esi is used as a "source" pointer.
Speaking of the ABI, you need to save/restore ebx in functions that use it, same as ebp. It keeps its value across function calls (because any ABI-compliant function you call preserves it). I forget which other registers are callee-saved in the 32bit ABI. You can check at the helpful links in https://stackoverflow.com/tags/x86/info. 32bit is obsolete; 64bit has a more efficient ABI, and includes SSE2 as part of the baseline.

Resources