The assembly functions with commented c version:
/*
int f (int x)
{
return x+2;
}
void map2 (int* um, int * outro, int n)
{
int i;
for (i=0; i<n; i++)
*(outro+i) = f(*(um+i));
}
*/
.text
.globl f
f:
/********************************** prologue *************************************/
pushl %ebp
movl %esp, %ebp
pushl %ebx
/********************************************************************************/
movl 8(%ebp), %eax /* eax receives x value */
addl $2, %eax /* return x+2; */
/************************************** end *************************************/
popl %edi
popl %ebx
movl %ebp, %esp
popl %ebp
ret
/*********************************************************************************/
.globl map2
map2:
/********************************** prologue *************************************/
pushl %ebp
movl %esp, %ebp
pushl %ebx
pushl %esi
pushl %edi
/********************************************************************************/
movl $0, %ebx /* i = 0; INIT */
L1: cmpl 16(%ebp), %ebx /* if (!(i<n)) */
jge out
movl 12(%ebp), %esi /* esi receives 'outro' (another) address */
**movl %ebx, %ecx /* moves ebx value for bytes multiplication */
imul $4, %ecx /* 4(int) * i bytes to course */**
addl %ecx, %esi /* esi points to outro+i */
movl 8(%ebp), %edi /* edi receives 'um' (one) address */
**movl %ebx, %edx /* moves ebx value for bytes multiplication */
imul $4, %edx /* 4(int) * i bytes to course */**
addl %edx, %edi /* edi points to um+i */
/************************ calls f and return it's value *************************/
pushl %ecx
pushl %edx
pushl %eax
pushl (%edi) /* push *(um+i) for 'f' usage */
call f
movl %eax, (%esi) /* *(outro+i) = f(*(um+i)); */
addl $4, %esp /* clears *(um+i) from stack */
popl %eax
popl %edx
popl %ecx
/********************************************************************************/
incl %ebx /* i++; */
jmp L1 /* end loop */
out:
/************************************** end *************************************/
popl %edi
popl %esi
popl %ebx
movl %ebp, %esp
popl %ebp
ret
/********************************************************************************/
The main C Code:
#include <stdio.h>
#define N 10
int f (int x);
void map2 (int* um, int * outro, int n);
int main (void) {
int i;
int a[N], b[N];
for (i=0;i<N;i++)
{
a[i] = i;
printf("b[%d] = %d\n", i, f(i)); // added for debug purposes
}
map2(a,b,N);
printf("\n"); // added for clear sight
for (i=0;i<N;i++)
printf("b[%d] = %d\n", i, b[i]);
return 1;
}
output:
b[0] = 2
b[1] = 3
b[2] = 4
b[3] = 5
b[4] = 6
b[5] = 7
b[6] = 8
b[7] = 9
b[8] = 10
b[9] = 11
b[0] = 33686018
b[1] = 33686019
b[2] = 516
b[3] = -253
b[4] = -145333866
b[5] = -143814668
b[6] = -145333723
b[7] = -143596928
b[8] = 0
b[9] = 134513961
<seg fault>
Pretty clear that the 'f' function is ok, however 'map2' has some problem. From b[4] to b[7] getting memory junk. What's wrong here?
My guess is something about popl %edi popl %esi at the end of the code, since they're holding the two arrays addresses. Even so, changes there didn't fix the problem.
Related
This question was proposed to me by a friend and I have no idea how to solve it.
loop:
leal (%rdi, %rdi, 4), %eax
leal (%rsi, %rax, 2), %eax
leal 0(, %rax, 4), %edx
cmpl %edx, %esi
jge .L1
leal (%rdi, %rdi, 2), %edx
.L3:
addl %edx, %eax
cmpl $-2, %esi
jl .L3
.L1:
rep ret
And is supposed to map to this loop in C,
int loop(int a, int b){
int x, y;
y = ____;
for (____; ____; ____){
____;
}
return ____;
}
My attempt at converting the assembly to C,
y = 5a;
y = b + 2y;
x = 4y;
if (x < b){
x = 3a;
do{
y += x;
} while (b <= -2);
}
return y;
I assumed %eax = y, since 'y' in the code to fill is the first variable being assigned.
'x' follows as %edx since it's another assignment, and so should be at least part of the "Initialisation" of the for loop.
However this doesn't seem to fix into the blanks provided, so I am really stuck.
I think I've got a really close, if not perfect solution:
/* rdi = a, rsi = b */
/* rax = y, rdx = x */
/*
loop:
leal (%rdi, %rdi, 4), %eax
leal (%rsi, %rax, 2), %eax
leal 0(, %rax, 4), %edx
cmpl %edx, %esi
jge .L1
leal (%rdi, %rdi, 2), %edx
.L3:
addl %edx, %eax
cmpl $-2, %esi
jl .L3
.L1:
rep ret
*/
int loop(int a, int b){
int x, y;
y = b + (a * 5) * 2;
for (x = y * 4; x > b;){
do y += (x = a * 3); while(b < -2);
break;
}
return y;
}
Not sure if break; is an issue but I can't find a better way.
I am rephrasing this question based on the comments received.
I have a loop that runs 30 Billion times and assigns values to a chunk of memory assigned using malloc();
When the loop contains a condition it runs much slower than when the condition is not present. Review the scenarios below:
Scenario A: Condition is present and program is slow (43 sec)
Scenario B: Condition is not present and program is much faster (4 sec)
// gcc -O3 -c block.c && gcc -o block block.o
#include <stdio.h>
#include <stdlib.h>
#define LEN 3000000000
int main (int argc, char** argv){
long i,j;
unsigned char *n = NULL;
unsigned char *m = NULL;
m = (unsigned char *) malloc (sizeof(char) * LEN);
n = m;
srand ((unsigned) time(NULL));
int t = (unsigned) time(NULL);
for (j = 0; j < 10; j++){
n = m;
for (i = 0; i < LEN; i++){
//////////// A: THIS IS SLOW
/*
if (i % 2){
*n = 1;
} else {
*n = 0;
}
*/
/////////// END OF A
/////////// B: THIS IS FAST
*n = 0;
i % 2;
*n = 1;
/////////// END OF B
n += 1;
}
}
printf("Done. %d sec \n", ((unsigned) time(NULL)) - t );
free(m);
return 0;
}
Regards,
KD
You can use gcc -S -O3 to have a look at the resulting assembler.
Here is an example on an Intel box:
Fast version:
movl %eax, %r12d
.p2align 4,,10
.p2align 3
.L2:
movl $3000000000, %edx
movl $1, %esi
movq %rbp, %rdi
call memset
subq $1, %rbx
jne .L2
Slow version:
movl $10, %edi
movl %eax, %ebp
movl $3000000000, %esi
.p2align 4,,10
.p2align 3
.L2:
xorl %edx, %edx
.p2align 4,,10
.p2align 3
.L5:
movq %rdx, %rcx
andl $1, %ecx
movb %cl, (%rbx,%rdx)
addq $1, %rdx
cmpq %rsi, %rdx
jne .L5
subq $1, %rdi
jne .L2
Conclusion: the compiler is smarter than you think. It is able to optimize the inner loop as a memset (which is faster because it uses SSE/AVX or REP instructions on Intel). However, this optimization cannot kick in if the condition is kept - because the result is different.
The assembly function with commented c version:
/*
struct X
{
int c; // 4 bytes
struct X *next; // 4 bytes
};
int add2 (struct X *x)
{
if (x == NULL) return 0;
else return x->c + add2(x->next);
}
*/
.text
.globl add2
add2:
/********************************** prologue *************************************/
pushl %ebp
movl %esp, %ebp
pushl %ebx
pushl %esi
/********************************************************************************/
movl 8(%ebp), %ebx
cmpl $0, %ebx
jne out
movl $0, %eax
jmp end
out:
/***************************** calculates in x->next *******************************/
pushl %ecx
pushl %edx
pushl %eax
movl 4(%ebx), %esi
pushl %esi
call add2
addl $4, %esp
popl %eax
popl %edx
popl %ecx
/********************************************************************************/
cmpl $0, (%ebx) /* > negative values */
js neg /* treatment < */
addl (%ebx), %eax /* return x->c + add2(x->next); */
neg:negl (%ebx) /* c = |c| */
subl (%ebx), %eax /* return x->(-)c + add2(x->next); */
end:
/****************************************end *************************************/
popl %esi
popl %ebx
movl %ebp, %esp
popl %ebp
ret
/*********************************************************************************/
The main c code:
#include <stdio.h>
#include <stdlib.h>
struct X
{
int c;
struct X * next;
};
typedef struct X Xlist;
Xlist * lst_create (void)
{
return NULL;
}
Xlist * lst_insert (Xlist * l, int c)
{
Xlist * new = (Xlist*) malloc(sizeof(Xlist));
new->c = c;
new->next = l;
return new;
}
int add2 (struct X * x);
int main (void)
{
// int i;
Xlist * l;
l = lst_create();
//for (i=-9;i<10;i++)
l = lst_insert(l, -1);
printf("%d\n", add2(l));
return 0;
}
The intention is to print the sum of the elements of a linked list.
I'm getting memory garbage when using negative values. I believe the error is somehow here:
neg:negl (%ebx) /* c = |c| */
subl (%ebx), %eax /* return x->(-)c + add2(x->next); */
But why?
Already used the same algorithm in other add function and it was ok.
It seems to me that a big problem is that your recursive call to add2() ignores the return value:
pushl %eax
movl 4(%ebx), %esi
pushl %esi
call add2
addl $4, %esp
popl %eax ; <-- overwrites what the add2 call returned
Also, your C equivalent code doesn't seem to be really be equivalent. The assembly version modifies the negative values in the list to be positive; that isn't reflected in your C code version.
I was practicing some assembly code to C and need some help with two questions. Based on the GCC objdump it seems okay but I want to make sure I can do this WITHOUT a computer (still kind of new to assembly code)
Question 1 :
q1:
pushl %ebp
movl %esp, %ebp
subl $4, %esp
cmpl $0, 8(%ebp)\\ compare variable1 to zero
jle .L2 \\jump if less than or equal to zero
movl $1, -4(%ebp)\\ ?? variable2 = 1??
jmp .L4\\else
.L2:
movl $0, -4(%ebp)\\ variable2 = 0
.L4:
movl -4(%ebp), %eax\\ variable2 = variable1
leave
ret
what I got was
int main(int x, int z)
{
if (x < 0)
z = 0;
else
z = x;
}
But I was not sure what the purpose of movl $1, -4(%ebp) was.
Question 2 :
fn:
pushl %ebp
movl $1, %eax
movl %esp, %ebp
movl 8(%ebp), %edx
cmpl $1, %edx\\ compare variable1 to 1
jle .L4\\ less than or equal jump.
.L5:
imull %edx, %eax\\ multiply variable1 by variable 2
subl $1, %edx\\ variable1 -1
cmpl $1, %edx\\ compare variable1 with 1
jne .L5 Loop if not equal
.L4:
popl %ebp\\ return value
ret
How I interpreted the information
int main(int x)
{
int result;
if (x <= 1){
for (result=1; x != 1; x = x-1)
result *= x;}
else{return result;}
}
Not sure if my logic is correct on either of those.
Q1 you have one argument 8(%ebp) and one local variable at -4(%ebp). Return value will be in %eax. Knowing this, the function looks more like:
int foo(int arg)
{
int local;
if (arg <= 0) {
local = 0;
} else {
local = 1;
}
return local;
}
Q2 popl %ebp // return value that's not the return value, that's restoring the saved %ebp of the caller (that was pushed in the beginning). Also, the condition in the loop should use > not !=. You are missing an if (x > 1) conditional around the for loop. (Thanks to Mooing Duck for pointing this out.) Also, technically it's a do-while loop. Otherwise you got this function right.
int factorial(int x)
{
int result = 1;
if (x > 1) {
do {
result *= x;
x -= 1;
} while(x != 1);
}
return result;
}
I'm really confused by my homework assignment. We are given C code and then assembly which are listed below. This is x86 assembly. Any assistance would be greatly appreciated. I've made an attempt to solve it based on what I'm understanding.
C code:
void transpose(Marray_t A) {
int i, j;
for (i = 0; i < M; i++)
for (j = 0; j < i; j++) {
int t = A[i][j];
A[i][j] = A[j][i];
A[j][i] = t;
}
}
Assembly code for ONLY inner loop:
1 .L3:
2 movl (%ebx), %eax //is this getting the mem location of %ebx and setting to %eax?
3 movl (%esi,%ecx,4), %edx //ecx * 4 + esi into edx
4 movl %eax, (%esi,%ecx,4) //
5 addl $1, %ecx //add 1 to ecx
6 movl %edx, (%ebx) //move edx to mem location of ebx???
7 addl $52, %ebx //I think this is M but I could be wrong
8 cmpl %edi, %ecx //compare edi & ecx
9 jl .L3
here is what I have to answer:
A. What is the value of M? ...I think this is 52...?
B. What registers hold program values i and j? ... I think edx and eax?
C. Write a C code version of transpose that makes use of the optimizations
that occur in this loop. Use the parameter M in your code rather than numeric
constants.
Attempt at (C):
void tranpose(Marray_t A) {
int i, j;
for(i = 0; i < M; i++) {
for(j = 0; j < i; j++) {
int *row = &A[i][0];
int *col = &A[0][j];
int value = (*row * 4) + *col;
}
}
}
1 .L3:
2 movl (%ebx), %eax // eax := read memory word at ebx
3 movl (%esi,%ecx,4), %edx // edx := read memory word at esi + 4*ecx
4 movl %eax, (%esi,%ecx,4) // store eax into that location
5 addl $1, %ecx // add 1 to ecx
6 movl %edx, (%ebx) // store edx into memory at ebx
7 addl $52, %ebx // add 52 to ebx
8 cmpl %edi, %ecx // compare edi & ecx
9 jl .L3
So in this code. %ebx is the the address of A[j][i], %esi is the address of A[i] and %ecx is j. 52 is sizeof(A[j]), so M is probably 13 (as the array element size is 4)