Read Cortex A15 Performance Counter from User Space - arm

I am trying to read the performance counters (cycle and event count registers) of my ARM big.LITTLE. It consists of 4 Cortex A7 and 4 Cortex A 15 Cores. I have no problems reading the values of the performance counters if I set my tested task on the A7 core but if I want to test the same task on Cortex A15 I get an "illegal instruction". I put the code for enabling the counters below.
I think its because my kernelmodule only enables the performance counter of the A7 to userspace. But I can't figure out how to enable the counters of the A15 to userspace.
Does someone have an idea how I could do it?
I appreciate any help.
#define PERF_DEF_OPTS (1 | 16)
#define DRVR_NAME "enable_arm_pmu"
static void enable_cpu_counter(void* data){
/*Enable counters to user land*/
__asm__("MCR p15, 0, %0, c9, c14, 0" :: "r"(1));
__asm__("MCR p15, 0, %0, c9, c12, 0" :: "r"(PERF_DEF_OPTS));
__asm__ ("MCR p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
}
static void disable_cpu_counter(void* data){
__asm__("MCR p15, 0, %0, c9, c14, 0" :: "r"(0));
__asm__("MCR p15, 0, %0, c9, c12, 0" :: "r"(PERF_DEF_OPTS));
__asm__ ("MCR p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
}
static int hello_init(void)
{
printk(KERN_ALERT "Performance counter enable\n");
on_each_cpu(enable_cpu_counter, NULL, 1);
printk(KERN_INFO "[" DRVR_NAME "] initialised");
return 0;
}
static void hello_exit(void)
{
printk(KERN_ALERT "Performance counter disabled\n");
on_each_cpu(disable_cpu_counter, NULL, 1);
printk(KERN_INFO "[" DRVR_NAME "] unloaded");
}
module_init(hello_init);
module_exit(hello_exit);
`

Related

Problems enabling MMU on ARM Cortex-A8. CPU is S5PV210

These days i just want to write some bare-metal codes to deal with MMU, after days of trying, I still can't make it working. Since i can't debug it with serial console , and i don't have expensive debugger like D-STREAM, What i can do is to paste codes here and seek for help.
I don't want to be a leech, but i really don't know what futher information i can provide.
My CPU is S5PV210 based on cortex-A8 architecture
What i want to do is just doing a flat memory mapping,
which just mean "Virtual Address == Physical Address"
The codes after
"ldr pc, =0x30000000\n"
is just flash LED program. If a comment this line in "enable_mmu"
"orr r0, r0, #0x0001\n" /* .... .... .... ...1 Enable MMU */
my flash led program will work well, if i uncomment it , flash led stop working.
Here is the whole program
#define ttl_base 0x2F000000
#define MMU_DES_FULL_ACESS (3<<10)
#define MMU_DES_DOMAIN (0<<5)
#define MMU_DES_EXECUTE_NEVER (0<<4)
#define MMU_DES_CACHEABLE (1<<3)
#define MMU_DES_BUFFERABLE (1<<2)
#define MMU_DES_SECTION (2)
#define MMU_DES_ATTRIBUTE (MMU_DES_SECTION|MMU_DES_BUFFERABLE|MMU_DES_CACHEABLE|MMU_DES_EXECUTE_NEVER|MMU_DES_DOMAIN|MMU_DES_FULL_ACESS)
void init_mmu()
{
//Create Translation Table for a flat map (Vitual Address == Physical Address)
u32 virtualaddr,phyaddr;
u32 *mmu_tlb_base=(u32 *)ttl_base;
virtualaddr = 0x0;
phyaddr = 0x0;
while(1)
{
*(mmu_tlb_base + (virtualaddr>>20)) = (phyaddr & 0xFFF00000) | (MMU_DES_ATTRIBUTE); //map 0x0x30000000-0x30100000 to 0xB0000000-0xB0100000
virtualaddr+=0x100000;
phyaddr+=0x100000;
if (phyaddr==0x00000000)
{
break;
}
}
}
void enable_mmu()
{
__asm__(
"mrc p15, 0, r0, c1, c0, 0\n"
"bic r0, r0, #0x3000\n"
"mcr p15, 0, r0, c1, c0, 0\n" /* Disable Insturection cache */
"mov r0, #0\n"
"mcr p15, 0, r0, c7, c5, 0\n" /*Instruction cache invalidate all*/
"mcr p15, 0, r0, c7, c5, 6\n" /*branch predictor invalidate all*/
"mcr p15, 0, r0, c8, c7, 0\n" /* Invalidate data and instruction TLB */
/*Invalidate entire Data cache*/
/*Start*/
"MRC p15, 1, r0, c0, c0, 0\n" /* Read Cache Size ID */
"LDR r3, =0x1ff\n"
"AND r0, r3, r0, LSR #13\n" /* r0 = no. of sets - 1 */
"MOV r1, #0\n" /* r1 = way counter way_loop */
"way_loop:\n"
"MOV r3, #0\n" /* r3 = set counter set_loop */
"set_loop:\n"
"MOV r2, r1, LSL #30\n" /* */
"ORR r2, r3, LSL #5\n" /* r2 = set/way cache operation format */
"MCR p15, 0, r2, c7, c6, 2\n" /* Invalidate line described by r2 */
"ADD r3, r3, #1\n" /* Increment set counter */
"CMP r0, r3\n" /* Last set reached yet? */
"BGT set_loop\n" /* if not, iterate set_loop */
"ADD r1, r1, #1\n" /* else, next */
"CMP r1, #4\n" /* Last way reached yet? */
"BNE way_loop\n" /* if not, iterate way_loop */
/*End*/
/*Data and Instruction barrier*/
"dsb\n"
"isb\n"
"mov r0, #0\n"
"mcr p15, 0, r0, c2, c0, 2\n" /*Clear L2 Translation Table Entry*/
"mov r4, #0x2F000000\n"
"mcr p15, 0, r4, c2, c0, 0\n" /*Write L1 Translation Table Entry*/
"mvn r0, #0\n"
"mcr p15, 0, r0, c3, c0, 0\n" /*Write 0xFFFFFFFF to Domain Access Register, which means no permission check*/
"mrc p15, 0, r0, c1, c0, 0\n" /* Read SCTLR */
/* .RVI ..RS B... .CAM */
"bic r0, r0, #0x3000\n" /* ..11 .... .... .... Clear bit V、bit I */
"bic r0, r0, #0x0087\n" /* .... .... 1... .111 Clear bit B/C/A/M */
"orr r0, r0, #0x0002\n" /* .... .... .... ..1. Enable Aligment Check */
"orr r0, r0, #0x0004\n" /* .... .... .... .1.. Enable Data Caches */
"orr r0, r0, #0x1000\n" /* ...1 .... .... .... Enable Instruction Caches */
"orr r0, r0, #0x0800\n" /* .... 1... .... .... Enble brach prediction */
"orr r0, r0, #0x0001\n" /* .... .... .... ...1 Enable MMU */
"mcr p15, 0, r0, c1, c0, 0\n" /* Write back to SCTLR */
"ldr sp, =0x3F000000\n"
"ldr pc, =0x30000000\n"
"loop:\n"
"b loop\n"
);
}
It seems that your page table init code is right. But you need to provide the MMU_DES_ATTRIBUTE. I think there is something wrong with the attributes.

avr inline assembler error: impossible constraint

#include <avr/io.h>
int main(void){
asm volatile("ldi r16, %0\n\t"
"out %1, r16\n\t"
"ldi r16, %0\n\t"
"out %2, r16\n\t"::"M" (0xff),"I" (_SFR_IO_ADDR(DDRB)),"I" (_SFR_IO_ADDR(PORTB)));
while(1) {
asm volatile("cbi %0, %1\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"nop\n\t"
"sbi %0, %1\n\t"::"I" (SFR_IO_ADDR(PORTB)), "M" (0xff));
}
}
At compilation: error: impossible constraint
the pointer is set to the asm statement position: 11-9 (asm volatile("cbi %0, %1\n\t")
But why?
You are missing the leading underscore on SFR_IO_ADDR(PORTB) so it gets compiled as a call to an external function returning an integer. If you had enabled warnings you would have seen this: warning: implicit declaration of function 'SFR_IO_ADDR'

Compile GCC Inline Assembly into Microsoft Visual C++ 2008

I'm having trouble compiling this GCC inline assembly to Microsoft Visual C++ 2008 assembly
GCC inline assembly:
__asm__(
"smull %0, %1, %2, %3 \n\t"
"mov %0, %0, LSR #16 \n\t"
"add %1, %0, %1, LSL #16 \n\t"
: "=&r"(lo), "=&r"(hi)
: "r"(rb), "r"(ra));
The compiler says:
error C2143: syntax error : missing ')' before ':'
The complete function is:
static __inline Word32 mull(Word32 a, Word16 b)
{
register Word32 ra = a;
register Word32 rb = b;
Word32 lo, hi;
__asm__(
"smull %0, %1, %2, %3 \n\t"
"mov %0, %0, LSR #16 \n\t"
"add %1, %0, %1, LSL #16 \n\t"
: "=&r"(lo), "=&r"(hi)
: "r"(rb), "r"(ra));
return hi;
}
Thanks.
Visual Studio does not support ARM inline assembly. See: Inline assembly is not supported on the ARM. You will need to either reverse-engineer the assembly code to C, or use a separate assembler and link this as a separate function.
It looks like this function just does a 32 x 32 -> 64 bit signed multiply and then shifts the 64 bit result right by 16 bits and truncates it to 32 bits:
static __inline Word32 mull(Word32 a, Word16 b)
{
return (Word32)(((Word64)a * (Word64)b) >> 16);
}

How to add a counter in gcc asm?

In the linux kernel code, when a spinlock is locked, the spin_lock function will spinning. The code of spin_lock is below:
static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
/* don't need lfence here, because loads are in-order */
"jmp 1b\n"
"2:"
: "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
:
: "memory", "cc");
}
My question is:
How can I add a time counter to monitor the spinning time of the lock?Please give me some advice.
You can use rdtsc time stamp counter to measure the interval ,you can view the below links http://www.xml.com/ldd/chapter/book/ch06.html
http://wiki.osdev.org/Inline_Assembly/Examples

Profling on arm Cortex_A8

I want to do profiling for my application on ARM processor. I found the oprofile doesn't work. Someone used the following code to test a few years ago. the cyclic counter
does work, the performance monitor counter still doesn't work. I tested it again, it is same. For following code, I got cycle count: 2109, performance monitor count: 0. I have searched by google, so far, I have not found a solution. Has someone fixed this issue?
uint32_t value = 0
uint32_t count = 0;
struct timeval tv;
struct timezone tz;
// enable all counters
__asm__ __volatile__ ("mcr p15, 0, %0, c9, c12, 1" ::"r" (0x8000000f));
// select counter 0,
__asm__ __volatile__("mcr p15, 0, %0, c9, c12, 5" ::"r" (0x0));
// select event
__asm__ __volatile__ ("mcr p15, 0, %0, c9, c13, 1" ::"r"(0x57));
// reset all counters to ero and enable all counters
__asm__ __volatile__ ("mrc p15, 0, %0, c9, c12, 0" : "=r" (value));
value |= 0xF;
__asm__ __volatile__ ("mcr p15, 0, %0, c9, c12, 0" :: "r" (value));
gettimeofday(&tv, &tz);
__asm__ __volatile__("mrc p15, 0, %0, c9, c13, 0" : "=r" (count));
printf("cycle count: %d", count);
__asm__ __volatile__ ("mrc P15, 0, %0, c9, c13, 2": "=r" (count));
printf("performance monitor count: %d", count);
I just ran into the same issue, and in my case it was due to the NIDENm signal being pulled low.
From the ARM documentation:
The PMU only counts events when non-invasive debug is enabled, that is, when either DBGENm or NIDENm inputs are asserted. The Cycle Count (PMCCNTR) Register is always enabled regardless of whether non-invasive debug is enabled, unless the DP bit of the PMCR register is set.
That NIDENm signal is an input to the ARM core, so exactly how it is controlled will depend on the parts of the processor external to the core. In my case, I found a register controlling NIDEN. In your case, it may be a register, or a pin, or (possibly) the signal is just pulled low and you can't use the feature.
Also from the ARM documentation:
The values of the DBGENm and NIDENm signals can be determined by polling DBGDSCR[17:16], DBGDSCR[15:14], or the DBGAUTHSTATUS.
So, if you can read one of those, you can confirm that the problem is NIDENm.

Resources