Linux kernel: zap_page_range() duration - c

zap_page_range is defined in mm/memory.c. Here is how it is defined:
/*
* remove user pages in a given range.
*/
void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
{
mmu_gather_t *tlb;
pgd_t * dir;
unsigned long start = address, end = address + size;
int freed = 0;
dir = pgd_offset(mm, address);
/*
* This is a long-lived spinlock. That's fine.
* There's no contention, because the page table
* lock only protects against kswapd anyway, and
* even if kswapd happened to be looking at this
* process we _want_ it to get stuck.
*/
if (address >= end)
BUG();
spin_lock(&mm->page_table_lock);
flush_cache_range(mm, address, end);
tlb = tlb_gather_mmu(mm);
do {
freed += zap_pmd_range(tlb, dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
/* this will flush any remaining tlb entries */
tlb_finish_mmu(tlb, start, end);
/*
* Update rss for the mm_struct (not necessarily current->mm)
* Notice that rss is an unsigned long.
*/
if (mm->rss > freed)
mm->rss -= freed;
else
mm->rss = 0;
spin_unlock(&mm->page_table_lock);
}
For some reason, I want this function to be executed as long as possible. Can anyone tell me how to do this? If the size is really big, will it take longer?

Related

How to get the pagesize of a memory segment just from a virtual address?

Linux can have both standard 4KiB page memory and 1GiB (huge) paged memory (and 2MiB pages, but I don't know if anyone uses that).
Is there a standard call to get the page size from an arbitrary virtual address? The pointer could be pointing to 4K pages or huge pages.
The problem at hand is to sanity (assert(...)) check arguments to a function that requires the base address and size of the region needs to be multiples of the page size, to be handed to mbind. But the page size varies on the system. Without sanity checking, the return value of mbind just gives Invalid argument which is not helpful for debugging.
I've looked at this answer How to get linux kernel page size programmatically but it gives answers that assume that the entire system is the same, and they are also compile time constants. Also getpagesize() does the same and it is deprecated anyways.
This is related to the MMU, see https://unix.stackexchange.com/questions/128213/how-is-page-size-determined-in-virtual-address-space and normally the page size is equal for the entire system / kernel, it is determined during kernel compilation
I realize one way of doing it is to scrape /proc/self/maps and somewhere in there is a keyword that indicates if a memory range has huge pages or not. I don't know how portable that is (man page for /proc doesn't say what it is, but I've seen it). But that seems a heavy handed way of just looking up a pointer to get the page size. And then on top of that, I don't think it indicates page size, just whether or not it is "huge pages".
I am not 100% i understand your requirements correctly, but i 'll give it a try.
There is an interesting function posted here by user Ciro Santilli, pagemap_get_entry. It uses the /proc/[pid]/pagemap interface to get the page table entry (pte) that corresponds to the virtual address you give as input. From the pte, you get the pfn (physical frame number) where the virtual address is mapped. Having this function, we can use the following logic to find out if a virtual address is mapped to 4K, 2M or 1G physical page:
First, get the address of the 1G virtual page where the virtual address of interest belongs. Call pagemap_get_entry with that virtual address and if the returned pfn is 218-aligned, then assume we are on a 1G physical page (218 is used because we assume size of physical frame to be 4K=212 bytes and 218*212=230=1GiB).
Else, get the address of the 2M virtual page inside which the virtual address falls. Call pagemap_get_entry with that and if the returned pfn is 29-aligned, then assume we are inside a 2M physical page (again 29*212=221=2MiB).
Else, assume that virtual address is mapped in RAM with 4K physical page.
With code, i hope it would be something like that (part of linked post is reposted here for completeness):
#define _XOPEN_SOURCE 700
#include <fcntl.h> /* open */
#include <stdint.h> /* uint64_t */
#include <stdio.h> /* printf */
#include <stdlib.h> /* size_t, malloc */
#include <unistd.h> /* pread, sysconf, getpid */
#include <sys/types.h> /* getpid */
#include <string.h> /* memset */
typedef struct {
uint64_t pfn : 55;
unsigned int soft_dirty : 1;
unsigned int file_page : 1;
unsigned int swapped : 1;
unsigned int present : 1;
} PagemapEntry;
/* Parse the pagemap entry for the given virtual address.
*
* #param[out] entry the parsed entry
* #param[in] pagemap_fd file descriptor to an open /proc/pid/pagemap file
* #param[in] vaddr virtual address to get entry for
* #return 0 for success, 1 for failure
*/
int pagemap_get_entry(PagemapEntry *entry, int pagemap_fd, uintptr_t vaddr)
{
size_t nread;
ssize_t ret;
uint64_t data;
uintptr_t vpn;
vpn = vaddr / sysconf(_SC_PAGE_SIZE);
nread = 0;
while (nread < sizeof(data)) {
ret = pread(pagemap_fd, ((uint8_t*)&data) + nread, sizeof(data) - nread,
vpn * sizeof(data) + nread);
nread += ret;
if (ret <= 0) {
return 1;
}
}
entry->pfn = data & (((uint64_t)1 << 55) - 1);
entry->soft_dirty = (data >> 55) & 1;
entry->file_page = (data >> 61) & 1;
entry->swapped = (data >> 62) & 1;
entry->present = (data >> 63) & 1;
return 0;
}
int main()
{
unsigned long long PAGE_SIZE_1G = 1024*1024*1024;
unsigned long long PAGE_SIZE_2M = 2*1024*1024;
unsigned long long PAGE_SIZE_4K = 4*1024;
uint64_t pfn_1g, pfn_2m, pfn_4k, pfn_original;
char * arr = (char *)malloc(4*PAGE_SIZE_1G * sizeof(char));
if (arr == NULL) {
printf("malloc\n");
return 1;
}
memset(arr, 1, 4*PAGE_SIZE_1G);
uintptr_t vaddr = (uintptr_t)arr + 1024*1025*1026; // get a random virtual address
PagemapEntry entry;
uintptr_t vaddr_1g_aligned = vaddr & ~(PAGE_SIZE_1G - 1);
uintptr_t vaddr_2m_aligned = vaddr & ~(PAGE_SIZE_2M - 1);
uintptr_t vaddr_4k_aligned = vaddr & ~(PAGE_SIZE_4K - 1);
printf("Virtual address of interest %jx\n", (uintmax_t) vaddr);
printf("1G-aligned virtual address %jx\n", (uintmax_t) vaddr_1g_aligned);
printf("2M-aligned virtual address %jx\n", (uintmax_t) vaddr_2m_aligned);
printf("4K-aligned virtual address %jx\n", (uintmax_t) vaddr_4k_aligned);
char pagemap_file[BUFSIZ];
int pagemap_fd;
pid_t pid = getpid();
snprintf(pagemap_file, sizeof(pagemap_file), "/proc/%ju/pagemap", (uintmax_t)pid);
pagemap_fd = open(pagemap_file, O_RDONLY);
if (pagemap_fd < 0) {
return 1;
}
if (pagemap_get_entry(&entry, pagemap_fd, vaddr_1g_aligned)) {
printf("pagemap_get_entry\n");
return 1;
}
pfn_1g = entry.pfn;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr_2m_aligned)) {
printf("pagemap_get_entry\n");
return 1;
}
pfn_2m = entry.pfn;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr_4k_aligned)) {
printf("pagemap_get_entry\n");
return 1;
}
pfn_4k = entry.pfn;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr)) {
printf("pagemap_get_entry\n");
return 1;
}
pfn_original = entry.pfn;
printf("pfn of 1G-alignment: %jx\n", (uintmax_t) pfn_1g);
printf("pfn of 2M-alignment: %jx\n", (uintmax_t) pfn_2m);
printf("pfn of 4K-alignment: %jx\n", (uintmax_t) pfn_4k);
printf("pfn of original address: %jx\n", (uintmax_t) pfn_original);
if ((pfn_1g != 0) && (pfn_1g % (1 << 18) == 0)) {
printf("Virtual address is mapped to 1G physical page\n");
}
else if ((pfn_2m != 0) && (pfn_2m % (1 << 9) == 0)) {
printf("Virtual address is mapped to 2M physical page\n");
}
else {
printf("Virtual address is mapped to 4K physical page\n");
}
return 0;
}
As original poster explains, you have to run this program with sudo, because of read access to /proc/<pid>/pagemap.
In my system that supports only 2M and 4K page sizes, i get the followings:
root#debian # cat /sys/kernel/mm/transparent_hugepages/enabled
always madvise [never]
root#debian # ./physical_page_size
Virtual address of interest 7f4f9d01a810
1G-aligned virtual address 7f4f80000000
2M-aligned virtual address 7f4f9d000000
4K-aligned virtual address 7f4f9d01a000
pfn of 1G-alignment: 1809fa
pfn of 2M-alignment: 1639fa
pfn of 4K-alignment: 163a14
pfn of original address: 163a14
Virtual address is mapped to 4K physical page
root#debian # echo "always" > /sys/kernel/mm/transparent_hugepages/enabled
root#debian # ./physical_page_size
Virtual address of interest 7f978d0d2810
1G-aligned virtual address 7f9780000000
2M-aligned virtual address 7f978d000000
4K-aligned virtual address 7f978d0d2000
pfn of 1G-alignment: 137a00
pfn of 2M-alignment: 145a00
pfn of 4K-alignment: 145ad2
pfn of original address: 145ad2
Virtual address is mapped to 2M physical page
Also, i have to mention that when the program reports 1G or 2M physical page size, it is not guaranteed that this is the case, however is very highly possible.
Finally, i see that your problem is with mbind. Again, i am not sure i understand it correctly or if this is a valid suggestion, but maybe you could try all possible page sizes starting from smallest until the call succeeds.
int wrapper(void *start, unsigned long size)
{
unsigned long long PAGE_SIZE_4K = 4*1024;
unsigned long long PAGE_SIZE_2M = 2*1024*1024;
unsigned long long PAGE_SIZE_1G = 1024*1024*1024;
void *start_4k = (void *)((unsigned long) start & ~(PAGE_SIZE_4K-1));
void *start_2m = (void *)((unsigned long) start & ~(PAGE_SIZE_2M-1));
void *start_1g = (void *)((unsigned long) start & ~(PAGE_SIZE_1G-1));
unsigned long size_4k, size_2m, size_1g;
if (size % PAGE_SIZE_4K != 0) {
size_4k = size - (size % PAGE_SIZE_4K) + PAGE_SIZE_4K;
}
if (size % PAGE_SIZE_2M != 0) {
size_2m = size - (size % PAGE_SIZE_2M) + PAGE_SIZE_2M;
}
if (size % PAGE_SIZE_1G != 0) {
size_1g = size - (size % PAGE_SIZE_1G) + PAGE_SIZE_1G;
}
if (mbind(start_4k, size_4k, .....) == 0) {
return 0;
}
if (mbind(start_2m, size_2m, .....) == 0) {
return 0;
}
if (mbind(start_1g, size_1g, .....) == 0) {
return 0;
}
return 1;
}

What is the use of the type userptr_t in os161?

I'm trying to complete an assignment for an operating systems course, Here.
I had a question from the assignment:
What is the purpose of userptr_t?
When I searched the source code for userptr_t, Here, I found this:
/*
* Define userptr_t as a pointer to a one-byte struct, so it won't mix
* with other pointers.
*/
struct __userptr { char _dummy; };
typedef struct __userptr *userptr_t;
typedef const struct __userptr *const_userptr_t;
I can't get to understand completely the use of it, can anyone explain what is the purpose of this type?
It's used here for example in the file copyinout.c in the functions copyin, copyout, copyinstr, copyoutstr and other functions:
#include <types.h>
#include <kern/errno.h>
#include <lib.h>
#include <setjmp.h>
#include <thread.h>
#include <current.h>
#include <vm.h>
#include <copyinout.h>
/*
* User/kernel memory copying functions.
*
* These are arranged to prevent fatal kernel memory faults if invalid
* addresses are supplied by user-level code. This code is itself
* machine-independent; it uses the machine-dependent C setjmp/longjmp
* facility to perform recovery.
*
* However, it assumes things about the memory subsystem that may not
* be true on all platforms.
*
* (1) It assumes that user memory is mapped into the current address
* space while running in the kernel, and can be accessed by just
* dereferencing a pointer in the ordinary way. (And not, for example,
* with special instructions or via special segment registers.)
*
* (2) It assumes that the user-space region of memory is contiguous
* and extends from 0 to some virtual address USERSPACETOP, and so if
* a user process passes a kernel address the logic in copycheck()
* will trap it.
*
* (3) It assumes that access to user memory from the kernel behaves
* the same way as access to user memory from user space: for
* instance, that the processor honors read-only bits on memory pages
* when in kernel mode.
*
* (4) It assumes that if a proper user-space address that is valid
* but not present, or not valid at all, is touched from the kernel,
* that the correct faults will occur and the VM system will load the
* necessary pages and whatnot.
*
* (5) It assumes that the machine-dependent trap logic provides and
* honors a tm_badfaultfunc field in the thread_machdep structure.
* This feature works as follows: if an otherwise fatal fault occurs
* in kernel mode, and tm_badfaultfunc is set, execution resumes in
* the function pointed to by tm_badfaultfunc.
*
* This code works by setting tm_badfaultfunc and then copying memory
* in an ordinary fashion. If these five assumptions are satisfied,
* which is the case for many ordinary CPU types, this code should
* function correctly. If the assumptions are not satisfied on some
* platform (for instance, certain old 80386 processors violate
* assumption 3), this code cannot be used, and cpu- or platform-
* specific code must be written.
*
* To make use of this code, in addition to tm_badfaultfunc the
* thread_machdep structure should contain a jmp_buf called
* "tm_copyjmp".
*/
/*
* Recovery function. If a fatal fault occurs during copyin, copyout,
* copyinstr, or copyoutstr, execution resumes here. (This behavior is
* caused by setting t_machdep.tm_badfaultfunc and is implemented in
* machine-dependent code.)
*
* We use the C standard function longjmp() to teleport up the call
* stack to where setjmp() was called. At that point we return EFAULT.
*/
static
void
copyfail(void)
{
longjmp(curthread->t_machdep.tm_copyjmp, 1);
}
/*
* Memory region check function. This checks to make sure the block of
* user memory provided (an address and a length) falls within the
* proper userspace region. If it does not, EFAULT is returned.
*
* stoplen is set to the actual maximum length that can be copied.
* This differs from len if and only if the region partially overlaps
* the kernel.
*
* Assumes userspace runs from 0 through USERSPACETOP-1.
*/
static
int
copycheck(const_userptr_t userptr, size_t len, size_t *stoplen)
{
vaddr_t bot, top;
*stoplen = len;
bot = (vaddr_t) userptr;
top = bot+len-1;
if (top < bot) {
/* addresses wrapped around */
return EFAULT;
}
if (bot >= USERSPACETOP) {
/* region is within the kernel */
return EFAULT;
}
if (top >= USERSPACETOP) {
/* region overlaps the kernel. adjust the max length. */
*stoplen = USERSPACETOP - bot;
}
return 0;
}
/*
* copyin
*
* Copy a block of memory of length LEN from user-level address USERSRC
* to kernel address DEST. We can use memcpy because it's protected by
* the tm_badfaultfunc/copyfail logic.
*/
int
copyin(const_userptr_t usersrc, void *dest, size_t len)
{
int result;
size_t stoplen;
result = copycheck(usersrc, len, &stoplen);
if (result) {
return result;
}
if (stoplen != len) {
/* Single block, can't legally truncate it. */
return EFAULT;
}
curthread->t_machdep.tm_badfaultfunc = copyfail;
result = setjmp(curthread->t_machdep.tm_copyjmp);
if (result) {
curthread->t_machdep.tm_badfaultfunc = NULL;
return EFAULT;
}
memcpy(dest, (const void *)usersrc, len);
curthread->t_machdep.tm_badfaultfunc = NULL;
return 0;
}
/*
* copyout
*
* Copy a block of memory of length LEN from kernel address SRC to
* user-level address USERDEST. We can use memcpy because it's
* protected by the tm_badfaultfunc/copyfail logic.
*/
int
copyout(const void *src, userptr_t userdest, size_t len)
{
int result;
size_t stoplen;
result = copycheck(userdest, len, &stoplen);
if (result) {
return result;
}
if (stoplen != len) {
/* Single block, can't legally truncate it. */
return EFAULT;
}
curthread->t_machdep.tm_badfaultfunc = copyfail;
result = setjmp(curthread->t_machdep.tm_copyjmp);
if (result) {
curthread->t_machdep.tm_badfaultfunc = NULL;
return EFAULT;
}
memcpy((void *)userdest, src, len);
curthread->t_machdep.tm_badfaultfunc = NULL;
return 0;
}
/*
* Common string copying function that behaves the way that's desired
* for copyinstr and copyoutstr.
*
* Copies a null-terminated string of maximum length MAXLEN from SRC
* to DEST. If GOTLEN is not null, store the actual length found
* there. Both lengths include the null-terminator. If the string
* exceeds the available length, the call fails and returns
* ENAMETOOLONG.
*
* STOPLEN is like MAXLEN but is assumed to have come from copycheck.
* If we hit MAXLEN it's because the string is too long to fit; if we
* hit STOPLEN it's because the string has run into the end of
* userspace. Thus in the latter case we return EFAULT, not
* ENAMETOOLONG.
*/
static
int
copystr(char *dest, const char *src, size_t maxlen, size_t stoplen,
size_t *gotlen)
{
size_t i;
for (i=0; i<maxlen && i<stoplen; i++) {
dest[i] = src[i];
if (src[i] == 0) {
if (gotlen != NULL) {
*gotlen = i+1;
}
return 0;
}
}
if (stoplen < maxlen) {
/* ran into user-kernel boundary */
return EFAULT;
}
/* otherwise just ran out of space */
return ENAMETOOLONG;
}
/*
* copyinstr
*
* Copy a string from user-level address USERSRC to kernel address
* DEST, as per copystr above. Uses the tm_badfaultfunc/copyfail
* logic to protect against invalid addresses supplied by a user
* process.
*/
int
copyinstr(const_userptr_t usersrc, char *dest, size_t len, size_t *actual)
{
int result;
size_t stoplen;
result = copycheck(usersrc, len, &stoplen);
if (result) {
return result;
}
curthread->t_machdep.tm_badfaultfunc = copyfail;
result = setjmp(curthread->t_machdep.tm_copyjmp);
if (result) {
curthread->t_machdep.tm_badfaultfunc = NULL;
return EFAULT;
}
result = copystr(dest, (const char *)usersrc, len, stoplen, actual);
curthread->t_machdep.tm_badfaultfunc = NULL;
return result;
}
/*
* copyoutstr
*
* Copy a string from kernel address SRC to user-level address
* USERDEST, as per copystr above. Uses the tm_badfaultfunc/copyfail
* logic to protect against invalid addresses supplied by a user
* process.
*/
int
copyoutstr(const char *src, userptr_t userdest, size_t len, size_t *actual)
{
int result;
size_t stoplen;
result = copycheck(userdest, len, &stoplen);
if (result) {
return result;
}
curthread->t_machdep.tm_badfaultfunc = copyfail;
result = setjmp(curthread->t_machdep.tm_copyjmp);
if (result) {
curthread->t_machdep.tm_badfaultfunc = NULL;
return EFAULT;
}
result = copystr((char *)userdest, src, len, stoplen, actual);
curthread->t_machdep.tm_badfaultfunc = NULL;
return result;
}
This looks like a strong typedef, i.e. a typedef intended to increase type safety by avoiding unintended uses/conversions of the wrapped data.
In your context, most likely intended to differentiate kernel pointers from user space pointers (usually mapped via the MMU).

retrieve information from a structure with ptrace

Here, I explain my problem, I am a beginner on the ptrace function and I would like to succeed in recovering the hard information of a structure.
For example with this command, I will have strace -e trace = fstat ls
a line: fstat (3, {st_mode = ..., st_size = ...}
and I would like to successfully retrieve the contents of the structure (st_mode) and (st_size).
I try this but to no avail:
int buffer(unsigned long long addr, pid_t child, size_t size, void *buffer)
{
size_t byte = 0;
size_t data;
unsigned long tmp;
while (byte < size) {
tmp = ptrace(PTRACE_PEEKDATA, child, addr + byte);
if ((size - byte) / sizeof(tmp))
data = sizeof(tmp);
else
data = size % sizeof(tmp);
memcpy((void *)(buffer + byte), &tmp, data);
byte += data;
}
}
and in params :
struct stat stat_i;
buffer(addr, pid, sizeof(stat_i), &stat_i);
printf("%lu", stat_i.st_size); -> fake value :/
Thank'ks !
From the man page,
PTRACE_PEEKTEXT, PTRACE_PEEKDATA
Read a word at the address addr in the tracee's memory,
returning the word as the result of the ptrace() call. Linux
does not have separate text and data address spaces, so these
two requests are currently equivalent. (data is ignored; but
see NOTES.)
Thus you must understand that tmp would hold the actually value that was read.
Your checks are wrong - you should set errno = 0 before the call and then check if it has changed. If it has - you've got an error. If it hasn't - you can be assured that tmp has the word from the remote process.
Try something like this:
int buffer(unsigned long long addr, pid_t child, size_t size, void *buffer)
{
size_t byte = 0;
size_t data;
unsigned long tmp;
// support for word aligned sizes only
if (size % sizeof(long) != 0)
return -1;
long * buffer_int = (long*) buffer;
while (byte < size) {
errno = 0;
tmp = ptrace(PTRACE_PEEKDATA, child, addr + byte);
if (errno)
return -1;
buffer_int[byte / sizeof(long)] = tmp;
byte += sizeof(long);
}
}

C - Memory access with mmap

I have a hex file of 327680 characters which I'm writing to physical address 0x30000000 - 0x3004FFFF on the memory on my ARM linux system.
While reading back from the memory I'm getting a segfault after reading 64170 characters from the start address, ie at 0x3000FAAA.
If I change my starting address to 0x3000FA64, then also I get a segfault after 64170 characters.
How do I ensure data is accessed correctly if Data > 4kB (page size) ?
I'm unable to understand the exact problem, so I'm adding the snippet of my code below:
#define MAX_RANGE 327679
int fd;
FILE* fd_table=NULL;
unsigned long int count = 0 ;
void * mem;
void * aligned_vaddr;
unsigned long aligned_paddr;
uint32_t aligned_size;
unsigned long int addr_phys;
uint8_t *addr;
int g_size = 1;
unsigned long int g_paddr = 0x30000000; //Starting physical address
while((count<MAX_RANGE)){
g_paddr = addr_phys;
g_paddr &= ~(g_size - 1);
aligned_paddr = g_paddr & ~(4096 - 1);
aligned_size = g_paddr - aligned_paddr + (g_count * g_size);
aligned_size = (aligned_size + 4096 - 1) & ~(4096 - 1);
/* Align address to access size */
aligned_vaddr = mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, aligned_paddr);
if (aligned_vaddr == NULL) {
printf("Error mapping address\n");
close(fd);
return 1;
}
mem = (void *)((uint32_t)aligned_vaddr + (g_paddr - aligned_paddr));
addr = mem;
fprintf(fd_table, "%02X\n",addr[0]);
addr_phys +=1; //Increment byte address
count++;
}
Note:
1. There is no error in the write process, I have verified by viewing the segfault address with memtool.
2. The address 0x30000000 onwards is not used by the system (I have ensured that in the u-boot).

How the Average Cache Miss Ratio (ACMR) is calculated?

I'm studying Tom Forsyth's Linear-Speed Vertex Cache Optimization and i don't understand how he calculates the ACMR. From what i have read i already know that ACMR = number of cache misses / number of triangles, but what i don't understand is what kind of cache is being used (i.e. FIFO or LRU?).
I have written a test program that calculates and prints the ACMR of a given 3d model using a FIFO cache, can you please tell me if this code is ok? or should i use an LRU cache instead?
/* the number of entries in a FIFO cache */
#define FIFO_CACHE_SIZE 32
struct fifo_cache {
long entries[FIFO_CACHE_SIZE];
};
/**
* init_cache - initializes a FIFO cache
* #cache: A pointer to the FIFO cache structure to be initialized.
*
* Before a FIFO cache can be used, it must be initialized by calling this
* function.
*/
static void init_cache(struct fifo_cache *cache)
{
int i = 0;
/* initialize cache entries to an invalid value */
for (i = 0;i < FIFO_CACHE_SIZE;i++)
cache->entries[i] = -1;
}
/**
* check_entry - checks if the same entry is already added to the cache
* #cache: A pointer to the FIFO cache structure to be searched.
* #entry: An entry to be searched for.
*
* Return: If the same entry was found, the return value is nonzero. Otherwise,
* the return value is zero.
*/
static int check_entry(const struct fifo_cache *cache, u16 entry)
{
int i = 0;
for (i = 0;i < FIFO_CACHE_SIZE;i++) {
if (cache->entries[i] == (long)entry)
return 1;
}
return 0;
}
/**
* add_entry - adds a new entry to the FIFO cache
* #cache: A pointer to the FIFO cache structure the entry will be added to.
* #entry: An entry to add.
*/
static void add_entry(struct fifo_cache *cache, u16 entry)
{
long aux = 0;
long aux2 = 0;
int i = 0;
aux = cache->entries[0];
cache->entries[0] = (long)entry;
for (i = 1;i < FIFO_CACHE_SIZE;i++) {
aux2 = cache->entries[i];
cache->entries[i] = aux;
aux = aux2;
}
}
/**
* calculate_acmr - calculates the average cache miss ratio (aka. ACMR)
* #indices: The list of vertex indices.
* #count: The number of vertex indices in the #indices list.
*/
float calculate_acmr(const u16 *indices, size_t count)
{
struct fifo_cache cache = {0};
long total = 0; /* the total number of cache misses */
long i = 0;
/* initialize the cache */
init_cache(&cache);
for (i = 0;i < count;i++) {
if (!check_entry(&cache, indices[i])) {
/* an entry doesn't exist in the cache, so add it */
add_entry(&cache, indices[i]);
total++;
}
}
return ((float)total / (count / 3));
}
I found the answer. Modern GPUs uses FIFO caches for simplicity and speed, so it makes sense to calculate the ACMR using FIFO cache. The code given above is correct, so i'll keep using that.
You are correct that is the way hardware does it. Additionally you may want to read this: http://www.realtimerendering.com/blog/acmr-and-atvr/

Resources