what is the right way to update MMU translation table - arm

I enabled MMU on my s3c2440 board (3G - 4G memory :: the fault attribute),everything was just fine when I didn't read/write 3G - 4G memory .So to test the page fault vector ,I wrote to a 0xFF to the 3G address,as I expected ,I got the right value from FSR ,So I did this in _do_page_fault (), the step was like this :
..... // set new page to translation table
.....
invlidate_icache (); // clear icache
clr_dcache (); // wb is used ,clear dcache
invalidate_ttb (); // invalidate translation table
and then ISR_dataabort returned ,I read the 3G address to get the 0xFF which I wote before .
Unfortunately I got a data abort again .(I am sure the translation table value I set is ok)
So what is the correct way to update the MMU translation table .Any help is appreciate ! thanks
here is the Main code I used (just for some test),
(I am a littlte strange to ARM ARCH,so this code might be urgly )
/* MMU TTB 0 BASE ATTR */
#define TTB0_FAULT (0|(1<<4)) /* TTB FAULT */
#define TTB0_COARSE (1|(1<<4)) /* COARSE PAGE BASE ADDR */
#define TTB0_SEG (2|(1<<4)) /* SEG BASE ADDR */
#define TTB0_FINE (3|(1<<4)) /* FINE PAGE BASE ADDR */
/* MMU TTB 1 BASE ATTR */
#define TTB1_FAULT (0)
#define TTB1_LPG (1) /* Large page */
#define TTB1_SPG (2) /* small page */
#define TTB1_TPG (3) /* tiny page */
/* domain access priority level */
#define FAULT_PL (0x0) /* domain fault */
#define USR_PL (0x1) /* usr mode */
#define RSV_PL (0x2) /* reserved */
#define SYS_PL (0x3) /* sys mode */
#define DOMAIN_FAULT (0x0<<5) /* fault 0*/
#define DOMAIN_SYS (0x1<<5) /* sys 1*/
#define DOMAIN_USR (0x2<<5) /* usr 2*/
/* C,B bit */
#define CB (3<<2) /* cache_on, write_back */
#define CNB (2<<2) /* cache_on, write_through */
#define NCB (1<<2) /* cache_off,WR_BUF on */
#define NCNB (0<<2) /* cache_off,WR_BUF off */
/* ap 2 bits */
#define AP_FAULT (0<<10) /* access deny */
#define AP_SU_ONLY (1<<10) /* rw su only */
#define AP_USR_RO (2<<10) /* sup=RW, user=RO */
#define AP_RW (3<<10) /* su=RW, user=RW */
/* page dir 1 ap0 */
#define AP0_SU_ONLY (1<<4) /* rw su only */
#define AP0_USR_RO (2<<4) /* sup=RW, user=RO */
#define AP0_RW (3<<4) /* su=RW, user=RW */
/* page dir 1 ap1 */
#define AP1_SU_ONLY (1<<6) /* rw su only */
#define AP1_USR_RO (2<<6) /* sup=RW, user=RO */
#define AP1_RW (3<<6) /* su=RW, user=RW */
/* page dir 1 ap2 */
#define AP2_SU_ONLY (1<<8) /* rw su only */
#define AP2_USR_RO (2<<8) /* sup=RW, user=RO */
#define AP2_RW (3<<8) /* su=RW, user=RW */
/* page dir 1 ap3 */
#define AP3_SU_ONLY (1<<10) /* rw su only */
#define AP3_USR_RO (2<<10) /* sup=RW, user=RO */
#define AP3_RW (3<<10) /* su=RW, user=RW */
#define RAM_START (0x30000000)
#define KERNEL_ENTRY (0x30300000) /* BANK 6 (3M) */
#define KERNEL_STACK (0x3001A000) /* BANK 6 (16K + 64K + 16K + 8K) (8k kernel stack) */
#define IRQ_STACK (0x3001B000) /* 4K IRQ STACK */
#define KERNEL_IMG_SIZE (0x20000)
#define IRQ_STACK (0x3001B000)
/* 16K aignment */
#define TTB_BASE (0x30000000)
#define PAGE_DIR0 (TTB_BASE)
#define TTB_FULL_SIZE (0x4000)
#define PAGE_DIR1 (TTB_BASE+TTB_FULL_SIZE)
#define PAGE_DIR0_SIZE (0x4000) /* 16k */
void _do_page_fault (void)
{
//
...........
//
// read the FSR && get the vaddr && type here
volatile unsigned *page_dir = (volatile unsigned*)(TTB_BASE);
unsigned index = vaddr >> 20,i = 0, j = 0;
unsigned page = 0;
if (!(page_dir[index] & ~(0x3FF) && (type == 0x0B))) { /* page_dir empty */
i = index & ~0x03;
if ( (page_dir[i+0] & ~(0x3FF)) || (page_dir [i+1] & ~(0x3FF))
|| (page_dir[i+2] & ~(0x3FF)) || (page_dir [i+3] & ~(0x3FF)) )
{
panic ( "page dir is bad !\n" ); /* 4 continuous page_dir must be 0 */
}
if (!(page = find_free_page ()))
panic ( "no more free page !\n" ); /* alloc a page page dir*/
page_dir[i+0] = (page + 0x000) | DOMAIN_USR | TTB0_COARSE ; /* small page 1st 1KB */
page_dir[i+1] = (page + 0x400) | DOMAIN_USR | TTB0_COARSE ; /* small page 2nd 1KB */
page_dir[i+2] = (page + 0x800) | DOMAIN_USR | TTB0_COARSE ; /* small page 3rd 1KB */
page_dir[i+3] = (page + 0xC00) | DOMAIN_USR | TTB0_COARSE ; /* small page 4th 1KB */
if (!(page = find_free_page ()))
panic ( "no more free page !\n" ); /* alloc a page page table*/
volatile unsigned *page_tbl = (volatile unsigned*) (page_dir[index] & ~(0x3FF));
*page_tbl = page|AP0_RW|AP1_RW|AP2_RW|AP3_RW| NCNB|TTB1_SPG;/* small page is used */
invalidate_icache ();
for (i = 0; i < 64; i++)
{
for (j = 0;j < 8;j ++)
clr_invalidate_dcache ( (i<<26)|(j<<5) );
}
invalidate_tlb ();
}
........
//
}
/* here is the macros */
#define invalidate_tlb() \
{\
__asm__ __volatile__ (\
"mov r0,#0\n"\
"mcr p15,0,r0,c8,c7,0\n"\
:::"r0" \
);\
}
#define clr_invalidate_dcache(index) \
{\
__asm__ __volatile__ (\
"mcr p15,0,%[i],c7,c14,2\n"\
:: [i]"r"(index)\
);\
}
#define invalidate_icache() \
{\
__asm__ __volatile__ (\
"mov r0,#0\n"\
"mcr p15,0,r0,c7,c5,0\n"\
::: "r0"\
);\
}
#define invalidate_dcache() \
{\
__asm__ __volatile__ (\
"mov r0,#0\n"\
"mcr p15,0,r0,c7,c6,0\n"\
::: "r0"\
);\
}
#define invalidate_idcache() \
{\
__asm__ __volatile__ (\
"mov r0,#0\n"\
"mcr p15,0,r0,c7,c7,0\n"\
:::"r0"\
);\
}\

Note: I assume TTB_BASE is the primary ARM L1 page table. If it is some shadow, you need to show more code as per unixsmurf. Here is my best guess...
Your page_dir is functioning as both the primary L1 entries and as the L2 fine page table. The TTB_BASE should only contain sections, super sections or pointers to sub-page tables. You need to allocate more physical memory for the L2 page tables.
I guess your page_dir[i+0], page_dir[i+1], etc are overwriting other L1 section entries and Your invalidate_tlb() make this concrete to the CPU. You should be using the L2 page_tbl pointer to set/index the small/fine pages. Perhaps your kernel code is in the 3G-4G space and your are over-writing some critical L1 mappings there; any number of strange things could happen. The current use of page_tbl is unclear.
You can not double use the primary L1 tables as they have meaning to hardware. I guess that this is just a mistake?
There are only three types of entries in the primary L1 page tables,
Sections - a 1MB memory mapping straight virt to phys, with no 2nd page table.
Super-sections - a 4MB memory mapping as per sections, but minimizing TLB pressure.
A 2nd page table. Entries can be 1k,4k,and 64k. Fine page tables are 4k in table size and are not in modern ARM designs. Each entry is 1k of address in a fine page table.
The primary page table must be on a physical address that is 16k aligned, which is also it's size. 16k/(4bytes/entry)*1MB gives a 4GB address range of the L1 tables. So each entry in the primary L1 page table always refers to a 1MB entry. Coarse page tables are the only option on newer ARMs and they refer to a 1K L2 table.
1K_L2_size * 4K_entry / (4bytes_per_entry) gives 1MB address space.
1K_L2_size * 64K_entry / (16bytes_per_entry) gives 1MB address space.
There are four entries of 1MB each in the primary L1 for a super-section. There are four entries each for a 64k large page in the L2 table. If you are using super-sections, you do not have L2 entries.
I think you may have Super-sections and large pages mixed up? For certain having improperly formatted page tables will only manifest on a TLB invalidate, so that MMU mappings are re-fetched from the tables via a walk.
Finally, you should flush the D cache and drain the write buffer to ensure consistency with memory. You may wish to have a memory barrier as well.
static inline void dcache_clean(void)
{
const int zero = 0;
asm volatile ("" ::: "memory"); /* barrier */
/* clean entire D cache -> push to external memory. */
asm volatile ("1: mrc p15, 0, r15, c7, c10, 3\n"
" bne 1b\n" ::: "cc");
/* drain the write buffer */
asm volatile ("mcr 15, 0, %0, c7, c10, 4"::"r" (zero));
}
There are also co-processor commands to invalidate single TLB entries as you are only changing a portion of the vaddr/paddr mappings in the data fault.
See also: ARM MMU tutorial, Virtual Memory structures, and your ARM Architecture Reference Manual.

Related

Programmable Interrupt Controller: PIC1 and PIC2 are not returning zeros in real mode

Hyper-V output:
Code:
/*PIC Definition*/
#define PIC1 0x20 /* IO base address for master PIC */
#define PIC2 0xA0 /* IO base address for slave PIC */
#define PIC1_COMMAND PIC1
#define PIC1_DATA (PIC1+1)
#define PIC2_COMMAND PIC2
#define PIC2_DATA (PIC2+1)
#define ICW1_ICW4 0x01 /* ICW4 (not) needed */
#define ICW1_SINGLE 0x02 /* Single (cascade) mode */
#define ICW1_INTERVAL4 0x04 /* Call address interval 4 (8) */
#define ICW1_LEVEL 0x08 /* Level triggered (edge) mode */
#define ICW1_INIT 0x10 /* Initialization - required! */
#define ICW4_8086 0x01 /* 8086/88 (MCS-80/85) mode */
#define ICW4_AUTO 0x02 /* Auto (normal) EOI */
#define ICW4_BUF_SLAVE 0x08 /* Buffered mode/slave */
#define ICW4_BUF_MASTER 0x0C /* Buffered mode/master */
#define ICW4_SFNM 0x10 /* Special fully nested (not) */
#define inb(x,y) asm volatile ("inb %1, %0" : "=a"(x) : "d"(y));
void PIC_remap(BYTE offset1, BYTE offset2)
{
unsigned char a1, a2, cmd;
WORD portnum = PIC1_DATA;
inb(a1, portnum); // save masks
portnum = PIC2_DATA;
inb(a2, portnum);
WORD ret1 = a1, ret2 = a2;
printf("Response from PIC1 and PIC2: %d %d", ret1, ret2);
portnum = PIC1_COMMAND;
cmd = (ICW1_INIT | ICW1_ICW4);
outb(portnum, cmd); // starts the initialization sequence (in cascade mode)
io_wait();
portnum = PIC2_COMMAND;
outb(portnum, cmd);
io_wait();
portnum = PIC1_DATA;
outb(portnum, offset1); // ICW2: Master PIC vector offset
io_wait();
portnum = PIC2_DATA;
outb(portnum, offset2); // ICW2: Slave PIC vector offset
io_wait();
portnum = PIC1_DATA;
cmd = 4;
outb(portnum, cmd); // ICW3: tell Master PIC that there is a slave PIC at IRQ2 (0000 0100)
io_wait();
portnum = PIC2_DATA;
cmd = 2;
outb(portnum, cmd); // ICW3: tell Slave PIC its cascade identity (0000 0010)
io_wait();
portnum = PIC1_DATA;
cmd = ICW4_8086;
outb(portnum, cmd);
io_wait();
portnum = PIC2_DATA;
cmd = ICW4_8086;
outb(portnum, cmd);
io_wait();
outb(PIC1_DATA, a1); // restore saved masks.
outb(PIC2_DATA, a2);
}
I am doing some search on how the programmable interrupt controller behaves under the real mode. But I came up with some problems.
Expected behavior: PIC1 and PIC2 should return 0 0.
Reality: They return 184 (0xB8) and 15 (0xF). Can any one tell me why?
Reference: https://wiki.osdev.org/8259_PIC
Zack (The OP) provided an update with their implementation of inb and this answer has been modified to provide a more specific answer. inb has been defined as a macro this way:
#define inb(x,y) asm volatile ("inb %1, %0" : "=a"(x) : "d"(y));
It might be clearer to name macros with upper case identifiers like INB. It was unclear to me from the original code that this was a macro at all. An alternative to a macro would be to make inb a static inline function in a shared header file. You could even mark it with __attribute__((always_inline)) so that it gets inlined on lower optimization levels. The function could have been defined this way:
typedef unsigned short int WORD;
typedef unsigned char BYTE;
#define alwaysinline __attribute__((always_inline))
static inline alwaysinline BYTE inb (WORD portnum)
{
BYTE byteread;
asm volatile ("inb %1, %0"
: "=a"(byteread)
: "Nd"(portnum));
return byteread;
}
Values Returned by PIC DATA Port
When you read from the PIC DATA port you will be reading the current mask values that are used to determine which interrupts will cause the CPU to be interrupted. It effectively determines which specific interrupts are enabled and disabled on each PIC. A bit value of 0 represents enabled and 1 represents disabled.
If both values are 0 then all the interrupts on both PIC1 and PIC2 will be enabled. This may be the case in some environments, but it doesn't necessarily have to be the case. What you read may be non zero and indicative of what interrupts the BIOS enabled and disabled.
According to the screenshot PIC1 has the value 184 (binary 10111000) and PIC2 is 15 (binary 00001111). If these are in fact the values read from the PICs then it would suggest the BIOS/Firmware enabled these interrupts (and the rest disabled):
IRQ0 - Timer
IRQ1 - Keyboard
IRQ2 - Cascade interrupt
IRQ6 - Usually the Floppy controller
IRQ12 - Mouse/PS2
IRQ13 - Inter Processor interrupt (IPI) - in old days it was for the separate FPU
IRQ14 - Primary ATA Channel (HDD/CD-ROM etc)
IRQ15 - Secondary ATA Channel (HDD/CD-ROM etc)
These make sense given they are the common interrupts that you would expect would be present on a system that was supporting legacy BIOS and devices. Although your values are not zero, they do in fact look reasonable and are likely the real values read from both PICs

HAL drivers erase/read/write flash on STM32F4 nucleo

uint32_t PAGEError = 0;
FLASH_EraseInitTypeDef EraseInitStruct;
EraseInitStruct.TypeErase = FLASH_TYPEERASE_SECTORS ;
EraseInitStruct.Sector = FLASH_SECTOR_0;
EraseInitStruct.VoltageRange = FLASH_VOLTAGE_RANGE_3;
HAL_FLASH_Unlock();
__HAL_FLASH_CLEAR_FLAG(FLASH_FLAG_EOP | FLASH_FLAG_OPERR | FLASH_FLAG_WRPERR | FLASH_FLAG_PGAERR | FLASH_FLAG_PGPERR | FLASH_FLAG_PGSERR);
HAL_FLASHEx_Erase(&EraseInitStruct, &PAGEError);
HAL_FLASH_Program(FLASH_TYPEPROGRAM_WORD, 0x08000000, counter)
HAL_FLASH_Lock();
counter2 = *(__IO uint32_t *)0x08000000;
counter3 = *(__IO uint32_t *)0x08000001;
counter4 = *(__IO uint32_t *)0x08000002;
sprintf(buf, "%d", counter2); //gets send to the OLED with I2C
sprintf(buf2, "%d", counter3);
sprintf(buf3, "%d", counter4);
I want to write the variable counter to the flash and then read it as counter2.
The first flash sector starts at 0x08000000.
counter2, 3 and 4 are display trough an OLED screen.
Displaying counter2 works and shows me the value of counter-1, but it works only once. If I write again to the flash nothing seems to happen.
counter3 and counter4 don't work at all.
Output on the OLED when counter=0x00000008 after I have erased the flash but not written anything:
counter2: 536873624
counter3: -652214262
counter4: 31006720
And after writting and ressetting:
counter2: 8
counter3: -654311424
counter4: 30998528
What is going on here? Can someone tell me why all variables change?
Do I have to configure the linker?
I will treat you now as begineer, but will say sorry if you are not.
STM32 devices have flash on 0x08000000 and by erasing this sector, you did failure on startup because you erased actual part from where CPU loads instructions.
When you tried to erase sectors, you did not specify how many sectors to erase.
Reading of counters is wrong. Since you have uint32_t variable, you have to do 4-bytes between readings, something like:
counter2 = *(__IO uint32_t *)0x08000000;
counter3 = *(__IO uint32_t *)0x08000004;
counter4 = *(__IO uint32_t *)0x08000008;
Correct erasing is shown below.
EraseInitStruct.TypeErase = FLASH_TYPEERASE_SECTORS;
EraseInitStruct.VoltageRange = FLASH_VOLTAGE_RANGE_3;
EraseInitStruct.Sector = FLASH_SECTOR_0; //Specify sector number
EraseInitStruct.NbSectors = 1; //This is also important!
if(HAL_FLASHEx_Erase(&EraseInitStruct, &SectorError) != HAL_OK) {
//Erase error!
}
So, find out how long is your program and do your operations in sector after your program.
You can find example for EraseProgram in STM32CubeF4 package.
STM32Cube_FW_F4_V1.16.0\Projects\STM324x9I_EVAL\Examples\FLASH\FLASH_EraseProgram\Src\main.c
Concept will work also on your nucleo, just make sure you set correct address for flash erasing.
Thanks to #phoenix!
In Stm32CubF3 reference erasing flash works as follows:
There is to mention how a memory page address is looking:
/* Base address of the Flash sectors */
#define ADDR_FLASH_PAGE_0 ((uint32_t)0x08000000) /* Base # of Page 0, 2 Kbytes */
#define ADDR_FLASH_PAGE_1 ((uint32_t)0x08000800) /* Base # of Page 1, 2 Kbytes */
#define ADDR_FLASH_PAGE_2 ((uint32_t)0x08001000) /* Base # of Page 2, 2 Kbytes */
#define ADDR_FLASH_PAGE_3 ((uint32_t)0x08001800) /* Base # of Page 3, 2 Kbytes */
#define ADDR_FLASH_PAGE_4 ((uint32_t)0x08002000) /* Base # of Page 4, 2 Kbytes */
#define ADDR_FLASH_PAGE_5 ((uint32_t)0x08002800) /* Base # of Page 5, 2 Kbytes */
#define ADDR_FLASH_PAGE_6 ((uint32_t)0x08003000) /* Base # of Page 6, 2 Kbytes */
So your minimal code looks as follows:
I want to delete my application # 0x08003000 --> FLASH_PAGE_6. The erasing is done 2kB wise:
/* EEPROM start address in Flash */
#define EEPROM_START_ADDRESS ((uint32_t)ADDR_FLASH_PAGE_32) /* EEPROM emulation start address */
/* Pages 0 and 1 base and end addresses */
#define PAGE0_BASE_ADDRESS ((uint32_t)(EEPROM_START_ADDRESS + 0x0000))
#define PAGE0_END_ADDRESS ((uint32_t)(EEPROM_START_ADDRESS + (PAGE_SIZE - 1)))
//#define PAGE0_ID ADDR_FLASH_PAGE_32
/* Clear flash flags */
HAL_FLASH_Unlock();
__HAL_FLASH_CLEAR_FLAG(FLASH_FLAG_EOP |
FLASH_FLAG_WRPERR |
FLASH_FLAG_PGERR);
uint32_t *flash_ptr = 0x8003000;
uint32_t page_error = 0;
HAL_StatusTypeDef flashstatus;
FLASH_EraseInitTypeDef s_eraseinit;
/* Fill EraseInit structure*/
s_eraseinit.TypeErase = FLASH_TYPEERASE_PAGES;
s_eraseinit.NbPages = 1; // seems to be 1 !!!!
/* I want to delete 54kB in my flash --> 52kB / 2kB (per erase) = 26 iterations
After erasing one page, increment page by 0x800 = 2048 byte = 2kB = pagesize */
for(int pageCount = 0; pageCount < 26; pageCount++){
s_eraseinit.PageAddress = ADDR_FLASH_PAGE_6 + pageCount * 0x800;
flashstatus = HAL_FLASHEx_Erase(&s_eraseinit, &page_error);
}
The best way is to create the new segment of flash in the linkescriptr and place the data there. It is the safest.
If do not know linker scripts create a table of the one segment size, and place it at the end of the flash using your compiler directives.
If you do not know both I suggest a ready made STM eeprom emulation example from the Cube

Erasing my Flash on STM32L1

I'm facing a problem with my erasing function.
I try to erase 15 sectors of my Flash in order to put a new binary file.
I don't understand why but my function freeze and i cannot erase all memory i need.
Here is my code if you want to give a try
/*
* bootloader.c
*
* Created on: 9 juin 2015
* Author: tgloaguen
*/
#include "usart.h"
#include "stm32l1xx_flash.h"
#define WRITE_START_ADDR 0x08000000
#define WRITE_END_ADDR 0x0800FFFF
#define FLASH_PAGE_SIZE ((uint16_t)0x100) //If a page is 256 bits
#define MY_BL_FUNCTIONS __attribute__((section(".bootsection")))
void BootLoader(void) MY_BL_FUNCTIONS;
FLASH_Status Flash_Write ( uint32_t StartAddress, uint8_t *p, uint32_t Size ) MY_BL_FUNCTIONS;
uint8_t Flash_Erase() MY_BL_FUNCTIONS;
void Receive_Data(char * buffer,int size)MY_BL_FUNCTIONS;
void Receive_Size(char * buffer, int *sizeData)MY_BL_FUNCTIONS;
void BootLoader(void) {
//clear all ITs
USART_ITConfig( USART1, USART_IT_RXNE, DISABLE );
//SendString("HELLO",USART2);
uint8_t status,i;
char buffer[33];
//en dur
uint16_t *adr = WRITE_START_ADDR;
uint16_t sizeBin = 51400,k = 0,k_hexa = 0x20;
SendString("BOOTLOADER",USART2);
Flash_Erase();
SendString("ERASEOK",USART2);
//if sizeBin ok
}
and the erase function
uint8_t Flash_Erase() {
uint32_t EraseCounter = 0x00, Address = 0x00;//Erase count, write address
uint32_t NbrOfPage = 0x00;//Recording to erase the pages
volatile FLASH_Status FLASHStatus = FLASH_COMPLETE;/*FLASH complete erasure marks*/
/*Unlock FLASH*/
FLASH_Unlock();
/*Calculate the number of FLASH pages need to erase */
NbrOfPage = (WRITE_END_ADDR - WRITE_START_ADDR) / FLASH_PAGE_SIZE;
/* Remove all hang flags */
FLASH_ClearFlag ( FLASH_FLAG_EOP |
FLASH_FLAG_WRPERR |
FLASH_FLAG_PGAERR |
FLASH_FLAG_SIZERR |
FLASH_FLAG_OPTVERR );
/* Erase the FLASH page*/
for(EraseCounter = 0; (EraseCounter <NbrOfPage) && (FLASHStatus == FLASH_COMPLETE); EraseCounter++)
{
SendString("ok |",USART2);
FLASHStatus = FLASH_ErasePage(WRITE_START_ADDR + (FLASH_PAGE_SIZE * EraseCounter));
}
FLASH_Lock ( );
return (uint8_t)FLASHStatus;
}
In your code you are tring to erase the whole flash, but you are executing your bootloader from flash from final sectors.
As reported by th REF man
During a write/erase operation to the NVM (except Half Page programming or Double-word
erase/write), any attempt to read the same bank of NVM stalls the bus.
Then you have to preserve the space of bootloader changing WRITE_END_ADDR define according to your memory map.
Example: if your bootloader is 4K long and belongs to the last sector (0x0801 F000 - 0x0801 FFFF) then WRITE_END_ADDR must be 0x0801 EFFF.
EDIT
As #Olaf wrote take care about your ISP (Initial Stack Pointer) and IPC (Initial Program Counter) that belong to first 8 bytes in your flash at address 0.

GameBoy compiler with system registers and interrupts

I have been spending a lot of time learning GameBoy programming, as I was already familiar with Z80 Assembly I wasn't afraid of jumping into using it. I would (of course) find it much more productive to program in C or C++ however cannot find a full compiler for the GameBoy, the compilers I can find manage everything themselves and do not give access to system registers to the programmer and also have some horrible drawbacks such as 100% CPU utilization and no interrupt support.
Would it be possible to address system registers much like Arduino's AVR compiler? being able to address a CPU or system register simply with its name such as DDRD = %10101011
What would I have to do to add interrupts and system registers into a compiler? All but one system register are only one byte memory addresses and interrupts vectors are of course memory locations, the only one system register that isn't a memory address can only be modified with two Assembly instructions EI and DI but that could be inline functions correct?
The usual tactic is to create your own pointers to system registers. I don't know the address of DDRD, but something like this should to the trick:
volatile unsigned char *reg_DDRD = (unsigned char *)0xE000;
*reg_DDRD = 0xAB;
Most C compilers don't support binary constants but you can use them with some macro hackery. And you can use macros to make slightly more intuitive syntax:
#define DDRD (*reg_DDRD)
DDRD = 0xAB;
There's no sense in modifying the compiler when vanilla C code can work just as well.
Handling interrupts comes down to solving 3 problems. The first is to have the interrupt vector address do a jump to a C function. As that is in ROM you'll need to modify the C runtime environment to initialize that. This gets pretty system dependent, but usually what you'll want to do is add an assembly language file that looks like this:
org 38h ; or wherever the gameboy CPU jumps to on interrupt
jp _intr_function
This should cause the CPU to go to intr_function() in your C program. You may or may not need the leading underscore. And you may not be able to set the destination address in the assembler file with org but instead have to fool around with the linker and sections.
The second problem is that the C function won't necessarily save all the registers it should. You can do this by adding in-line assembly to it, along these lines:
void intr_function()
{
asm(" push af");
asm(" push bc");
asm(" push de");
asm(" push hl");
// ... now do what you like here.
asm(" pop hl");
asm(" pop de");
asm(" pop bc");
asm(" pop af");
}
Finally, the interrupt may have to be acknowledged by manipulating a hardware register. But you can do that in the C code so nothing special about that.
I'm not clear about the issue with wait loops. Standard C compilers don't have such a feature built in. They call main() and if you want to loop it is up to you. It is true that the C-like language used in the Arduino SDK has its own built-in infinite loop that calls the functions you write, but that's not normal C.
First off, you can use GBDK, which is a C compiler and library for the Gameboy. It does provide access to the registers in gb/hardware.h (but that isn't listed in the doc file, since there's no comment for each individual register). It also supplies access to interrupts via methods in gb/gb.h: add_VBL, add_LCD, add_TIM, add_SIO, and add_JOY. (There's also remove methods named remove_).
For reference and/or your own use, here's the contents of gb/hardware.h:
#define __REG volatile UINT8 *
#define P1_REG (*(__REG)0xFF00) /* Joystick: 1.1.P15.P14.P13.P12.P11.P10 */
#define SB_REG (*(__REG)0xFF01) /* Serial IO data buffer */
#define SC_REG (*(__REG)0xFF02) /* Serial IO control register */
#define DIV_REG (*(__REG)0xFF04) /* Divider register */
#define TIMA_REG (*(__REG)0xFF05) /* Timer counter */
#define TMA_REG (*(__REG)0xFF06) /* Timer modulo */
#define TAC_REG (*(__REG)0xFF07) /* Timer control */
#define IF_REG (*(__REG)0xFF0F) /* Interrupt flags: 0.0.0.JOY.SIO.TIM.LCD.VBL */
#define NR10_REG (*(__REG)0xFF10) /* Sound register */
#define NR11_REG (*(__REG)0xFF11) /* Sound register */
#define NR12_REG (*(__REG)0xFF12) /* Sound register */
#define NR13_REG (*(__REG)0xFF13) /* Sound register */
#define NR14_REG (*(__REG)0xFF14) /* Sound register */
#define NR21_REG (*(__REG)0xFF16) /* Sound register */
#define NR22_REG (*(__REG)0xFF17) /* Sound register */
#define NR23_REG (*(__REG)0xFF18) /* Sound register */
#define NR24_REG (*(__REG)0xFF19) /* Sound register */
#define NR30_REG (*(__REG)0xFF1A) /* Sound register */
#define NR31_REG (*(__REG)0xFF1B) /* Sound register */
#define NR32_REG (*(__REG)0xFF1C) /* Sound register */
#define NR33_REG (*(__REG)0xFF1D) /* Sound register */
#define NR34_REG (*(__REG)0xFF1E) /* Sound register */
#define NR41_REG (*(__REG)0xFF20) /* Sound register */
#define NR42_REG (*(__REG)0xFF21) /* Sound register */
#define NR43_REG (*(__REG)0xFF22) /* Sound register */
#define NR44_REG (*(__REG)0xFF23) /* Sound register */
#define NR50_REG (*(__REG)0xFF24) /* Sound register */
#define NR51_REG (*(__REG)0xFF25) /* Sound register */
#define NR52_REG (*(__REG)0xFF26) /* Sound register */
#define LCDC_REG (*(__REG)0xFF40) /* LCD control */
#define STAT_REG (*(__REG)0xFF41) /* LCD status */
#define SCY_REG (*(__REG)0xFF42) /* Scroll Y */
#define SCX_REG (*(__REG)0xFF43) /* Scroll X */
#define LY_REG (*(__REG)0xFF44) /* LCDC Y-coordinate */
#define LYC_REG (*(__REG)0xFF45) /* LY compare */
#define DMA_REG (*(__REG)0xFF46) /* DMA transfer */
#define BGP_REG (*(__REG)0xFF47) /* BG palette data */
#define OBP0_REG (*(__REG)0xFF48) /* OBJ palette 0 data */
#define OBP1_REG (*(__REG)0xFF49) /* OBJ palette 1 data */
#define WY_REG (*(__REG)0xFF4A) /* Window Y coordinate */
#define WX_REG (*(__REG)0xFF4B) /* Window X coordinate */
#define KEY1_REG (*(__REG)0xFF4D) /* CPU speed */
#define VBK_REG (*(__REG)0xFF4F) /* VRAM bank */
#define HDMA1_REG (*(__REG)0xFF51) /* DMA control 1 */
#define HDMA2_REG (*(__REG)0xFF52) /* DMA control 2 */
#define HDMA3_REG (*(__REG)0xFF53) /* DMA control 3 */
#define HDMA4_REG (*(__REG)0xFF54) /* DMA control 4 */
#define HDMA5_REG (*(__REG)0xFF55) /* DMA control 5 */
#define RP_REG (*(__REG)0xFF56) /* IR port */
#define BCPS_REG (*(__REG)0xFF68) /* BG color palette specification */
#define BCPD_REG (*(__REG)0xFF69) /* BG color palette data */
#define OCPS_REG (*(__REG)0xFF6A) /* OBJ color palette specification */
#define OCPD_REG (*(__REG)0xFF6B) /* OBJ color palette data */
#define SVBK_REG (*(__REG)0xFF70) /* WRAM bank */
#define IE_REG (*(__REG)0xFFFF) /* Interrupt enable */
These are done in the same way as George Phillips's answer, and thus can be used like normal variables.
The code that is used by GBDK to add and remove interrupts is found in libc\gb\crt0.s, but I don't know enough of assembly to include the relevant sections in this post.
I'm not sure about how to avoid the busy loop either.

reading ARM 9g20 GPIO using mmap wont work

I'm trying access the GPIO pins on Atmel's Arm9 9g20. My code below keeps getting failing at
gpio = mmap(0, getpagesize(), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0xFFFFF400); // start of GPIOA
Could someone help me with my code and offer a bit of I/O example code to get me past this hump? Thanks.
// gpio.c
// compile arm-linux-gcc -o button button.c
//
#include<unistd.h>
#include<sys/types.h>
#include<sys/mman.h>
#include<stdio.h>
#include<fcntl.h>
#include<string.h>
// GPIO Registers
//http://www.atmel.com/dyn/resources/prod_documents/doc6384.pdf - page 374
#define PIO_PER 0x0000 // PIO Enable Register Write-only –
#define PIO_PDR 0x0004 // PIO Disable Register Write-only –
#define PIO_PSR 0x0008 // PIO Status Register Read-only
#define PIO_OER 0x0010 // Output Enable Register Write-only –
#define PIO_ODR 0x0014 // Output Disable Register Write-only –
#define PIO_OSR 0x0018 // Output Status Register Read-only. reset 0x0000 0000
//0x001C Reserved
#define PIO_IFER 0x0020 // Glitch Input Filter Enable Register Write-only –
#define PIO_IFDR 0x0024 // Glitch Input Filter Disable Register Write-only –
#define PIO_IFSR 0x0028 // Glitch Input Filter Status Register Read-only. Reset 0x0000 0000
//0x002C Reserved
#define PIO_SODR 0x0030 // Set Output Data Register Write-only –
#define PIO_CODR 0x0034 // Clear Output Data Register Write-only
#define PIO_ODSR 0x0038 // Output Data Status Register Read-only or Read-write
#define PIO_PDSR 0x003C // Pin Data Status Register Read-only
#define PIO_IER 0x0040 // Interrupt Enable Register Write-only –
#define PIO_IDR 0x0044 // Interrupt Disable Register Write-only –
#define PIO_IMR 0x0048 // Interrupt Mask Register Read-only. Reset 0x00000000
#define PIO_ISR 0x004C // Interrupt Status Register Read-only. Reset 0x00000000
#define PIO_MDER 0x0050 // Multi-driver Enable Register Write-only –
#define PIO_MDDR 0x0054 // Multi-driver Disable Register Write-only –
#define PIO_MDSR 0x0058 // Multi-driver Status Register Read-only. Reset 0x00000000
//0x005C Reserved
#define PIO_PUDR 0x0060 // Pull-up Disable Register Write-only –
#define PIO_PUER 0x0064 // Pull-up Enable Register Write-only –
#define PIO_PUSR 0x0068 // Pad Pull-up Status Register
#define PIO_ASR 0x0070 // Peripheral A Select Register Write-only –
#define PIO_BSR 0x0074 // Peripheral B Select Register Write-only –
#define PIO_ABSR 0x0078 // AB Status Register Read-only 0x00000000
//0x007C to 0x009C Reserved
#define PIO_OWER 0x00A0 // Output Write Enable Write-only –
#define PIO_OWDR 0x00A4 // Output Write Disable Write-only –
#define PIO_OWSR 0x00A8 // Output Write Status Register Read-only 0x00000000
/*******************************************************************************************************
* MAIN
*******************************************************************************************************/
int main(int argc, char **argv) {
volatile unsigned int *PADR, *PADDR, *PBDR, *PBDDR, *PCDR, *PCDDR;
unsigned long *gpio;
int fd = open("/dev/mem", O_RDWR|O_SYNC);
if (fd < 0){
fprintf(stderr, "Unable to open port\n\r");
exit(fd);
}
gpio = mmap(0, getpagesize(), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0xFFFFF400); // start of GPIOA
if(gpio == (void *) -1) {
printf("Memory map failed.\n");
exit(0);
} else {
printf("Memory mapped at address %p.\n", gpio);
}
PADR = (unsigned int *)(gpio + 0x00); // port a
PADDR = (unsigned int *)(gpio + PIO_OER); // port a output enable
*PADDR = 0xff; // make all output
*PADR = 0xffff; // turn All of A Off
close(fd);
return 0;
}
Not sure what type of failings you are experiencing (they are not described) but operations with GPIO port and comments are not correct. First of all, register PIO_PER is IO enable register, setting bits is not making them output but enabling. On the other hand, PIO_OER is indeed for making them output and not turning all off. So, you should stick to the following sequence:
// initializing
*(unsigned int *) (gpio + PIO_PER) = 0xff; // enable
*(unsigned int *) (gpio + PIO_OER) = 0xff; // set output
// working
...
*(unsigned int *) (gpio + PIO_SODR) = 0xff; // set 1's
...
*(unsigned int *) (gpio + PIO_CODR) = 0xff; // set 0's
UPDATE
Since only whole pages can be mapped, you should take that into consideration:
#define MAP_SIZE 4096UL
#define MAP_MASK (MAP_SIZE - 1)
#define GPIOA_BASE 0xFFFFF400
...
/* Map one page */
map_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, GPIOA_BASE & & ~MAP_MASK);
...
gpio = map_base + (GPIOA_BASE & MAP_MASK);
...
Check the sources of well-known devmem tool: here

Resources