how to use rdtscp correctly? - benchmarking

according to 《How to Benchmark Code Execution Times on Intel® IA-32 and IA-64 Instruction Set
Architectures》, i use code below:
static inline uint64_t bench_start(void)
{
unsigned cycles_low, cycles_high;
asm volatile("CPUID\n\t"
"RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
: "=r" (cycles_high), "=r" (cycles_low)
::"%rax", "%rbx", "%rcx", "%rdx");
return (uint64_t) cycles_high << 32 | cycles_low;
}
static inline uint64_t bench_end(void)
{
unsigned cycles_low, cycles_high;
asm volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"CPUID\n\t"
: "=r" (cycles_high), "=r" (cycles_low)
::"%rax", "%rbx", "%rcx", "%rdx");
return (uint64_t) cycles_high << 32 | cycles_low;
}
but in fact, I also see someone use code below:
static inline uint64_t bench_start(void)
{
unsigned cycles_low, cycles_high;
asm_volatile("RDTSCP\n\t"
: "=d" (cycles_high), "=a" (cycles_low));
return (uint64_t) cycles_high << 32 | cycles_low;
}
static inline uint64_t bench_start(void)
{
unsigned cycles_low, cycles_high;
asm_volatile("RDTSCP\n\t"
: "=d" (cycles_high), "=a" (cycles_low));
return (uint64_t) cycles_high << 32 | cycles_low;
}
as you know, RDTSCP is pseudo serializing ,why someone use the second code?two reasons I guess, below:
Maybe in most situation, RDTSCP can ensure complete "in-order exectuion"?
Maybe just want to avoid using CPUID for efficient?

Related

Impelementing atomic CAS in gcc inline

I've been trying to implement a gcc inline function (AT&T assembly) that will perform an atomic CAS operation, but I can't get it to work - the return value is always getting messed up.
I've tried 2 different approaches, each seems to have its own misbehaviours:
1.
static inline int
cas(volatile void * addr, int expected, int newval)
{
int result = 1;
asm volatile("lock; cmpxchgl %3, (%2)\n\t"
"pushfl\n\t"
"popl %%ebx\n\t"
"andl $0x40, %%ebx\n\t"
"cmpl $0x0, %%ebx\n\t"
"jnz res%=\n\t"
"movl $0, %0\n\t"
"res%=:\n\t"
: "=m"(result)
: "a"(expected), "b"(addr), "r"(newval)
: "memory");
return result;
}
2.
static inline int cas(volatile void * addr, int expected, int newval) {
int ret = 1;
asm volatile("lock; cmpxchgl %3, (%2)\n\t"
"jz cas_success%=\n\t"
"movl $0, %0\n\t"
"cas_success%=:\n\t"
: "=m"(ret)
: "a"(expected), "b"(addr), "r"(newval)
: "memory");
return ret;
}
But neither work, could anyone point me at the problem with one of the implementations?
Thanks

64bit dividend on 32bit architecture, works in assembly but not in C

I have a 64bit dividend and a 32bit divisor.
GCC do not seem to be able to create this kind of assembly. It complains about undefined reference to '__udivdi3', I know this is because I use the -nostdlib flag. I can however not use any stdlibs.
The 64bit variables are of type unsigned long long.
Are there any more elegant way to do this other than this inline assembly?
My goals is: my64bit / 32bitDivisor.
volatile uint32_t high = my64bit >> 32;
volatile uint32_t low = my64bit;
volatile uint32_t out;
__asm__ __volatile__ (
"movl %0, %%edx\n\t"
"movl %1, %%eax\n\t"
"div %2\n\t"
"movl %%eax, (%3)\n\t"
:: "r" (high), "r" (low), "r" (32bitDivisor) "r" (&out)
: "%eax", "%edx"
);

Compile GCC Inline Assembly into Microsoft Visual C++ 2008

I'm having trouble compiling this GCC inline assembly to Microsoft Visual C++ 2008 assembly
GCC inline assembly:
__asm__(
"smull %0, %1, %2, %3 \n\t"
"mov %0, %0, LSR #16 \n\t"
"add %1, %0, %1, LSL #16 \n\t"
: "=&r"(lo), "=&r"(hi)
: "r"(rb), "r"(ra));
The compiler says:
error C2143: syntax error : missing ')' before ':'
The complete function is:
static __inline Word32 mull(Word32 a, Word16 b)
{
register Word32 ra = a;
register Word32 rb = b;
Word32 lo, hi;
__asm__(
"smull %0, %1, %2, %3 \n\t"
"mov %0, %0, LSR #16 \n\t"
"add %1, %0, %1, LSL #16 \n\t"
: "=&r"(lo), "=&r"(hi)
: "r"(rb), "r"(ra));
return hi;
}
Thanks.
Visual Studio does not support ARM inline assembly. See: Inline assembly is not supported on the ARM. You will need to either reverse-engineer the assembly code to C, or use a separate assembler and link this as a separate function.
It looks like this function just does a 32 x 32 -> 64 bit signed multiply and then shifts the 64 bit result right by 16 bits and truncates it to 32 bits:
static __inline Word32 mull(Word32 a, Word16 b)
{
return (Word32)(((Word64)a * (Word64)b) >> 16);
}

Calling printf from inline ASM (X64)

I have this code:
#include <stdio.h>
#include <stdint.h>
int main(void){
char *fmt = "%s";
char *s = "Hello world!\n";
//gcc -m32 test.c
#ifdef __i386__
int32_t ret;
__asm__ __volatile__ (
"push %1\n\t"
"push %2\n\t"
"movl $2, %%eax\n\t"
"call printf\n\t"
"movl %0, %%eax"
: "=r" (ret)
: "r" (s), "r" (fmt)
:
);
#endif
//gcc -m64 test.c
#ifdef __x86_64__
int64_t ret;
__asm__ __volatile__ (
"push %1\n\t"
"push %2\n\t"
"movq $2, %%rax\n\t"
"call printf\n\t"
"movq %0, %%rax"
: "=r" (ret)
: "r" (s), "r" (fmt)
:
);
#endif
return ret;
}
The x86 version works as expected, but the x64 version segfaults. Why is it segfault-ing?
The 64-bit ABI uses registers (RDI, RSI, RDX, RCX, R8 and R9) instead of the stack for argument passing. So the code should be:
movl %2,%%rdi
movl %1,%%rsi
call printf
movq %0,%%rax
I think this is relative to 64bit EABI. You can find some information on that SO question.

How to add a counter in gcc asm?

In the linux kernel code, when a spinlock is locked, the spin_lock function will spinning. The code of spin_lock is below:
static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
/* don't need lfence here, because loads are in-order */
"jmp 1b\n"
"2:"
: "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
:
: "memory", "cc");
}
My question is:
How can I add a time counter to monitor the spinning time of the lock?Please give me some advice.
You can use rdtsc time stamp counter to measure the interval ,you can view the below links http://www.xml.com/ldd/chapter/book/ch06.html
http://wiki.osdev.org/Inline_Assembly/Examples

Resources