Impelementing atomic CAS in gcc inline - c

I've been trying to implement a gcc inline function (AT&T assembly) that will perform an atomic CAS operation, but I can't get it to work - the return value is always getting messed up.
I've tried 2 different approaches, each seems to have its own misbehaviours:
1.
static inline int
cas(volatile void * addr, int expected, int newval)
{
int result = 1;
asm volatile("lock; cmpxchgl %3, (%2)\n\t"
"pushfl\n\t"
"popl %%ebx\n\t"
"andl $0x40, %%ebx\n\t"
"cmpl $0x0, %%ebx\n\t"
"jnz res%=\n\t"
"movl $0, %0\n\t"
"res%=:\n\t"
: "=m"(result)
: "a"(expected), "b"(addr), "r"(newval)
: "memory");
return result;
}
2.
static inline int cas(volatile void * addr, int expected, int newval) {
int ret = 1;
asm volatile("lock; cmpxchgl %3, (%2)\n\t"
"jz cas_success%=\n\t"
"movl $0, %0\n\t"
"cas_success%=:\n\t"
: "=m"(ret)
: "a"(expected), "b"(addr), "r"(newval)
: "memory");
return ret;
}
But neither work, could anyone point me at the problem with one of the implementations?
Thanks

Related

how to use rdtscp correctly?

according to 《How to Benchmark Code Execution Times on Intel® IA-32 and IA-64 Instruction Set
Architectures》, i use code below:
static inline uint64_t bench_start(void)
{
unsigned cycles_low, cycles_high;
asm volatile("CPUID\n\t"
"RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
: "=r" (cycles_high), "=r" (cycles_low)
::"%rax", "%rbx", "%rcx", "%rdx");
return (uint64_t) cycles_high << 32 | cycles_low;
}
static inline uint64_t bench_end(void)
{
unsigned cycles_low, cycles_high;
asm volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"CPUID\n\t"
: "=r" (cycles_high), "=r" (cycles_low)
::"%rax", "%rbx", "%rcx", "%rdx");
return (uint64_t) cycles_high << 32 | cycles_low;
}
but in fact, I also see someone use code below:
static inline uint64_t bench_start(void)
{
unsigned cycles_low, cycles_high;
asm_volatile("RDTSCP\n\t"
: "=d" (cycles_high), "=a" (cycles_low));
return (uint64_t) cycles_high << 32 | cycles_low;
}
static inline uint64_t bench_start(void)
{
unsigned cycles_low, cycles_high;
asm_volatile("RDTSCP\n\t"
: "=d" (cycles_high), "=a" (cycles_low));
return (uint64_t) cycles_high << 32 | cycles_low;
}
as you know, RDTSCP is pseudo serializing ,why someone use the second code?two reasons I guess, below:
Maybe in most situation, RDTSCP can ensure complete "in-order exectuion"?
Maybe just want to avoid using CPUID for efficient?

Insert a string into register inline ASM with GCC

I'm working on a code that checks if the program is running under VM or VPC in C and I have some errors in the inline ASM at compile-time.
This is my code so far:
bool IsInsideVPC()
{
bool rc = false;
try
{
__asm__(
"push %ebx\n\t"
"movl $0, %ebx\n\t" // Flag
"movl $1, %eax"); // VPC function number
// call VPC
asm __volatile__ (".byte 0x0F");
asm __volatile__ (".byte 0x3F");
asm __volatile__ (".byte 0x07");
asm __volatile__ (".byte 0x0B");
__asm__(
"test %ebx, %ebx\n\t"
"movl $1, $rc\n\t" // Flag
"pop %ebx");
}
catch (...)
{
// The except block shouldn't get triggered if VPC is running!!
}
return rc;
}
bool IsInsideVMWare()
{
bool rc = false;
try
{
__asm__(
"push %edx\n\t"
"push %ecx\n\t" // Flag
"push %ebx\n\t"
"movl 'VMXh', %eax\n\t"
"movl $0, %ebx\n\t"
"movl $10, %ecx\n\t"
"movl 'VX', %edx\n\t"
"in %eax, %dx\n\t"
"cmp %ebx, 'VMXh'\n\t"
"movl $1, $rc\n\t"
"pop %ebx\n\t"
"pop %ecx\n\t"
"pop %edx");
}
catch (...)
{
rc = false;
}
return rc;
}
Ofcourse after all this, there is the main function that runs eveything, checks the results and outputs an answer.
I didn't write the original code, but I did changed it to the GCC syntax.
I'm getting errors that I think are related to the way I insert a string into a register.

Inline assembly in C not working properly

I'm trying to learn how to use inline assembly in C code.
I have created a small program that should add two integers:
int main(){
int a=1;
int b=2;
asm( "movl %0, %%r8d;"
"movl %1, %%r9d;"
"addl %%r8d, %%r9d;"
"movl %%r9d, %1;"
: "=r" (a)
: "r" (b)
:"%r8","%r9" );
printf("a=%d\n",a);
return 0;
}
The aim was to load a and b into the registers %r8 and %r9, add them, and then put the output back in a.
However this program prints a=2 instead a=3. I'm not sure if the problem is in the inline technique or in the assembly itself.
There are two issues here:
First: The "=r" constraint you use for the output operand a indicates to the compiler that the operand is write-only — it is allowed to assume that the initial value is not needed. This is definitely not the case for your code! Change the qualifier to "+r" to let the compiler know that the initial value is important.
Second: You are moving the result to the wrong register! The target %1 of the last movl is the register corresponding to b, not a. You want %0.
Fixed:
asm(
"movl %0, %%r8d;"
"movl %1, %%r9d;"
"addl %%r8d, %%r9d;"
"movl %%r9d, %0;"
: "+r" (a)
: "r" (b)
: "%r8", "%r9"
);

Calling printf from inline ASM (X64)

I have this code:
#include <stdio.h>
#include <stdint.h>
int main(void){
char *fmt = "%s";
char *s = "Hello world!\n";
//gcc -m32 test.c
#ifdef __i386__
int32_t ret;
__asm__ __volatile__ (
"push %1\n\t"
"push %2\n\t"
"movl $2, %%eax\n\t"
"call printf\n\t"
"movl %0, %%eax"
: "=r" (ret)
: "r" (s), "r" (fmt)
:
);
#endif
//gcc -m64 test.c
#ifdef __x86_64__
int64_t ret;
__asm__ __volatile__ (
"push %1\n\t"
"push %2\n\t"
"movq $2, %%rax\n\t"
"call printf\n\t"
"movq %0, %%rax"
: "=r" (ret)
: "r" (s), "r" (fmt)
:
);
#endif
return ret;
}
The x86 version works as expected, but the x64 version segfaults. Why is it segfault-ing?
The 64-bit ABI uses registers (RDI, RSI, RDX, RCX, R8 and R9) instead of the stack for argument passing. So the code should be:
movl %2,%%rdi
movl %1,%%rsi
call printf
movq %0,%%rax
I think this is relative to 64bit EABI. You can find some information on that SO question.

How to add a counter in gcc asm?

In the linux kernel code, when a spinlock is locked, the spin_lock function will spinning. The code of spin_lock is below:
static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
/* don't need lfence here, because loads are in-order */
"jmp 1b\n"
"2:"
: "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
:
: "memory", "cc");
}
My question is:
How can I add a time counter to monitor the spinning time of the lock?Please give me some advice.
You can use rdtsc time stamp counter to measure the interval ,you can view the below links http://www.xml.com/ldd/chapter/book/ch06.html
http://wiki.osdev.org/Inline_Assembly/Examples

Resources