Related
Is it possible to transfer data between threads like producer consumer using POSIX Message queue?
i need to transfer and an array of double with 5000 elements each from producer thread to consumer thread for processing
is POSIX Message queue designed for such a purpose?
POSIX message queues are absolutely the wrong tool for that.
All you actually need, is a buffer, a couple of counters or pointers, a mutex, and a couple of condition variables:
static pthread_mutex_t buffer_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t buffer_more = PTHREAD_COND_INITIALIZER;
static pthread_cond_t buffer_room = PTHREAD_COND_INITIALIZER;
/* Pointer and counters are volatile, since different threads may change them
whenever they hold the buffer_lock. */
static double * volatile buffer_data = NULL;
static volatile size_t buffer_size = 0;
static volatile size_t buffer_next = 0; /* First/next buffered value */
static volatile size_t buffer_ends = 0; /* First unused byte in buffer */
/* Optional flag to indicate no more data to be produced or consumed */
static volatile int buffer_done = 0;
/* Helper function to repack the buffer; caller must hold buffer_lock. */
static inline void buffer_repack_locked(void)
{
if (buffer_ends > buffer_next) {
if (buffer_next > 0) {
memmove(buffer_data, buffer_data + buffer_next,
(buffer_ends - buffer_next) * sizeof buffer_data[0]);
buffer_ends -= buffer_next;
buffer_next = 0;
}
} else {
buffer_next = 0;
buffer_ends = 0;
}
}
To grow the buffer (at any point), you use
static int buffer_resize(size_t new_size)
{
pthread_mutex_lock(&buffer_lock);
/* First, repack the buffer to start of the area. */
buffer_repack_locked();
/* Do not lose any data, however. */
if (new_size < buffer_ends)
new_size = buffer_ends;
/* Reallocate. */
void *new_data = realloc(buffer_data, new_size * sizeof buffer_data[0]);
if (!new_data) {
/* Not enough memory to reallocate; old data still exists. */
pthread_mutex_unlock(&buffer_lock);
return -1;
}
/* Success. */
buffer_data = new_data;
buffer_size = new_size;
/* Wake up any waiters waiting on room in the buffer, just to be sure. */
pthread_cond_broadcast(&buffer_room);
pthread_mutex_unlock(&buffer_lock);
return 0;
}
Producer or producers add a block of data to the buffer using
static void buffer_add(const double *data, size_t count)
{
pthread_mutex_lock(&buffer_lock);
buffer_repack_locked();
while (count > 0) {
if (buffer_ends >= buffer_size) {
/* Buffer is full. Wait for more room, repack, retry. */
pthread_cond_wait(&buffer_room, &buffer_lock);
buffer_repack_locked();
continue;
}
/* How much can we add? */
size_t size = buffer_size - buffer_ends;
if (size > count)
size = count;
memmove(buffer_data + buffer_ends, data, size * sizeof buffer_data[0]);
buffer_ends += size;
/* Wake up a consumer waiting on more data */
pthread_cond_signal(&buffer_more);
/* Update to reflect the data already added */
data += size;
count -= size;
}
/* All data added. */
pthread_mutex_unlock(&buffer_lock);
}
Similarly, consumers get data from the buffer using
static size_t buffer_get(double *data, size_t min_size, size_t max_size)
{
size_t size, have = 0;
/* Make sure min and max size are in the right order. */
if (max_size < min_size) {
size = max_size;
max_size = min_size;
min_size = size;
}
pthread_mutex_lock(&buffer_lock);
while (1) {
/* No more data incoming? */
if (buffer_done) {
pthread_mutex_unlock(&buffer_lock);
return have;
}
/* Buffer empty? */
if (buffer_next >= buffer_ends) {
pthread_cond_wait(&buffer_more, &buffer_lock);
continue;
}
/* How much can we grab? */
size = buffer_ends - buffer_next;
if (have + size > max_size)
size = max_size - have;
memmove(data, buffer_data + buffer_next,
size * sizeof buffer_data[0]);
buffer_next += size;
/* Wake up a waiter for empty room in the buffer. */
pthread_cond_signal(&buffer_room);
/* Enough data to return? */
if (have >= min_size) {
pthread_mutex_lock(&buffer_lock);
return have;
}
}
}
While this does copy the data around quite a bit, it allows both producers and consumers to work on their own data in any size "chunks" they wish.
If your producers and consumers work on matrices, or other "packetized" data of some maximum size, it makes sense to use singly-linked lists of preallocated packets of data, and not a linear buffer:
struct data_packet {
struct data_packet *next;
size_t size; /* Maximum size of data */
size_t used; /* Or rows, cols if a matrix */
double data[];
};
struct data_queue {
pthread_mutex_t lock;
pthread_cond_t more;
pthread_cond_t room;
struct data_packet *queue;
struct data_packet *unused;
unsigned long produced; /* Optional, just information */
unsigned long consumed; /* Optional, just information */
volatile int done; /* Set when no more to be produced */
};
static void free_data_packets(struct data_packet *root)
{
while (root) {
struct data_packet *curr = root;
root = root->next;
curr->next = NULL;
curr->size = 0;
free(curr);
}
}
To initialize a data queue, we also need to generate some empty packets in it. This must be done before any threads start working with the queue:
/* Returns the count of data packets actually created,
or 0 if an error occurs (with errno set).
*/
size_t data_queue_init(struct data_queue *q,
const size_t size,
const size_t count)
{
if (!q) {
errno = EINVAL;
return 0;
}
pthread_mutex_init(&(q->lock), NULL);
pthread_cond_init(&(q->more), NULL);
pthread_cond_init(&(q->room), NULL);
q->queue = NULL;
q->unused = NULL;
q->produced = 0;
q->consumed = 0;
q->done = 0;
/* Makes no sense to request no data packets. */
if (count < 1) {
errno = EINVAL;
return 0;
}
/* Create a chain of empty packets of desired size. */
struct data_packet *curr, *unused = NULL;
size_t have = 0;
while (have < count) {
curr = malloc( sizeof (struct data_packet)
+ size * sizeof curr->data[0]);
if (!curr)
break;
curr->next = unused;
curr->size = size;
curr->used = 0;
unused = curr;
have++;
}
if (!have) {
errno = ENOMEM;
return 0;
}
/* Attach chain to data queue; done. */
q->unused = unused;
return have;
}
Producers grab a free packet from the data queue:
struct data_packet *data_queue_get_unused(struct data_queue *q)
{
/* Safety check. */
if (!q) {
errno = EINVAL;
return NULL;
}
pthread_mutex_lock(&(q->lock));
while (!q->done) {
struct data_packet *curr = q->unused;
/* No unused data packets free? */
if (!curr) {
pthread_cond_wait(&(q->room), &(q->lock));
continue;
}
/* Detach and clear. */
q->unused = curr->next;
curr->next = NULL;
curr->used = 0;
/* Successful. */
pthread_mutex_unlock(&(q->lock));
return curr;
}
/* Done is set. */
pthread_mutex_unlock(&(q->lock));
errno = 0;
return NULL;
}
The above may return NULL, when an error occurs (errno will be set to a nonzero error), or when the done flag is set (errno will be zero).
The producer must remember to set the used field to reflect the amount of data it produced in the packet. (It must not exceed size, though.)
The producer can work on the data packet as they wish; it is their "own", and no locking is needed.
When the producer has completed the packet, they append it to the data queue:
int data_queue_append(struct data_queue *q, struct data_packet *p)
{
/* Safety check. */
if (!q || !p) {
errno = EINVAL;
return -1;
}
p->next = NULL;
pthread_mutex_lock(&(q->lock));
/* Append to queue. */
struct data_packet *prev = q->queue;
if (!prev) {
q->queue = p;
} else {
while (prev->next)
prev = prev->next;
prev->next = p;
}
q->produced++;
/* Wake up a waiter for a new packet. */
pthread_cond_signal(&(q->more));
/* Done. */
pthread_mutex_unlock(&(q->lock));
return 0;
}
Similarly, a consumer grabs the next packet from the queue,
struct data_packet *data_queue_get(struct data_queue *q)
{
/* Safety check. */
if (!q) {
errno = EINVAL;
return NULL;
}
pthread_mutex_lock(&(q->lock));
while (1) {
struct data_packet *curr = q->queue;
/* No data produced yet? */
if (!curr) {
/* If the done flag is set, we're done. */
if (q->done) {
pthread_mutex_unlock(&(q->lock));
errno = 0;
return NULL;
}
/* Wait for a signal on 'more'. */
pthread_cond_wait(&(q->more), &(q->lock));
continue;
}
/* Detach and done. */
q->queue = curr->next;
curr->next = NULL;
q->consumed++;
pthread_mutex_unlock(&(q->lock));
return curr;
}
}
and freely works on it. Note that the above does not examine the done flag unless the queue is empty.
When it is completed the work on the packet, it returns it to the unused queue:
int data_queue_append_unused(struct data_queue *q, struct data_packet *p)
{
/* Safety check */
if (!q || !p) {
errno = EINVAL;
return -1;
}
/* Clear it. */
p->used = 0;
pthread_mutex_lock(&(q->lock));
/* Prepend to unused queue. */
p->next = q->unused;
q->unused = p;
/* Signal a waiter that a new packet is available. */
pthread_cond_signal(&(q->room));
/* Done. */
pthread_mutex_unlock(&(q->lock));
return 0;
}
This approach allows one or more consumers and one or more producers work on their own packets on their own pace, without using any locks et cetera, and without copying the data itself around. However, the packet size and number of packets concurrently being worked on are limited.
The queue must be initialized with unused packet count at least the total number of producers and consumers; I prefer about twice that, to maximize throughput when the time taken by each varies a bit. The above, however, does allow removal of empty packets from the unused queue, and/or appending new empty packets to the unused queue, at any point in time. (When appending, remember to signal on the data queue room condition variable.)
Finally, note that the produced and consumed counts refer to the queue itself. If you want consumed to reflect the number of packets already consumed, you can move the q->consumed++ from data_queue_get() to data_queue_append_unused() instead.
It will work, but be aware that the absolute maximum message size is 16 MB (HARD_MSGSIZEMAX) since Linux 3.5, and was 1 MB before that. The default message size limit is only 8 KB though, so you need to set it when you call mq_open() or your 5000 doubles won't fit in one message.
A message queue is meant to transfer data between processes. Since threads are a part of the same process, there is no need to send data first to the kernel and then receive it back. In case of threads, all the global data is visible to all threads. Signalling mechanism like mutex and condition variables are required to synchronize the availability of data between threads.
I'm studying Tom Forsyth's Linear-Speed Vertex Cache Optimization and i don't understand how he calculates the ACMR. From what i have read i already know that ACMR = number of cache misses / number of triangles, but what i don't understand is what kind of cache is being used (i.e. FIFO or LRU?).
I have written a test program that calculates and prints the ACMR of a given 3d model using a FIFO cache, can you please tell me if this code is ok? or should i use an LRU cache instead?
/* the number of entries in a FIFO cache */
#define FIFO_CACHE_SIZE 32
struct fifo_cache {
long entries[FIFO_CACHE_SIZE];
};
/**
* init_cache - initializes a FIFO cache
* #cache: A pointer to the FIFO cache structure to be initialized.
*
* Before a FIFO cache can be used, it must be initialized by calling this
* function.
*/
static void init_cache(struct fifo_cache *cache)
{
int i = 0;
/* initialize cache entries to an invalid value */
for (i = 0;i < FIFO_CACHE_SIZE;i++)
cache->entries[i] = -1;
}
/**
* check_entry - checks if the same entry is already added to the cache
* #cache: A pointer to the FIFO cache structure to be searched.
* #entry: An entry to be searched for.
*
* Return: If the same entry was found, the return value is nonzero. Otherwise,
* the return value is zero.
*/
static int check_entry(const struct fifo_cache *cache, u16 entry)
{
int i = 0;
for (i = 0;i < FIFO_CACHE_SIZE;i++) {
if (cache->entries[i] == (long)entry)
return 1;
}
return 0;
}
/**
* add_entry - adds a new entry to the FIFO cache
* #cache: A pointer to the FIFO cache structure the entry will be added to.
* #entry: An entry to add.
*/
static void add_entry(struct fifo_cache *cache, u16 entry)
{
long aux = 0;
long aux2 = 0;
int i = 0;
aux = cache->entries[0];
cache->entries[0] = (long)entry;
for (i = 1;i < FIFO_CACHE_SIZE;i++) {
aux2 = cache->entries[i];
cache->entries[i] = aux;
aux = aux2;
}
}
/**
* calculate_acmr - calculates the average cache miss ratio (aka. ACMR)
* #indices: The list of vertex indices.
* #count: The number of vertex indices in the #indices list.
*/
float calculate_acmr(const u16 *indices, size_t count)
{
struct fifo_cache cache = {0};
long total = 0; /* the total number of cache misses */
long i = 0;
/* initialize the cache */
init_cache(&cache);
for (i = 0;i < count;i++) {
if (!check_entry(&cache, indices[i])) {
/* an entry doesn't exist in the cache, so add it */
add_entry(&cache, indices[i]);
total++;
}
}
return ((float)total / (count / 3));
}
I found the answer. Modern GPUs uses FIFO caches for simplicity and speed, so it makes sense to calculate the ACMR using FIFO cache. The code given above is correct, so i'll keep using that.
You are correct that is the way hardware does it. Additionally you may want to read this: http://www.realtimerendering.com/blog/acmr-and-atvr/
I am trying to write a function to clean up the hash table that is generated by this code
/*
* Markov chain random text generator.
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include "eprintf.h"
enum {
NPREF = 2, /* number of prefix words */
NHASH = 4093, /* size of state hash table array */
MAXGEN = 10000 /* maximum words generated */
};
typedef struct State State;
typedef struct Suffix Suffix;
struct State { /* prefix + suffix list */
char* pref[NPREF]; /* prefix words */
Suffix* suf; /* list of suffixes */
State* next; /* next in hash table */
};
struct Suffix { /* list of suffixes */
char* word; /* suffix */
Suffix* next; /* next in list of suffixes */
};
State* lookup(char *prefix[], int create);
void build(char *prefix[], FILE*);
void generate(int nwords);
void add(char *prefix[], char *word);
State* statetab[NHASH]; /* hash table of states */
char NONWORD[] = "\n"; /* cannot appear as real word */
/* markov main: markov-chain random text generation */
int main(void)
{
int i, nwords = MAXGEN;
char *prefix[NPREF]; /* current input prefix */
int c;
long seed;
setProgName("markov");
seed = time(NULL);
srand(seed);
for (i = 0; i < NPREF; i++) /* set up initial prefix */
prefix[i] = NONWORD;
build(prefix, stdin);
add(prefix, NONWORD);
generate(nwords);
return 0;
}
const int MULTIPLIER = 31; /* for hash() */
/* hash: compute hash value for array of NPREF strings */
unsigned int hash(char* s[NPREF])
{
unsigned int h;
unsigned char *p;
int i;
h = 0;
for (i = 0; i < NPREF; i++)
for (p = (unsigned char *) s[i]; *p != '\0'; p++)
h = MULTIPLIER * h + *p;
return h % NHASH;
}
/* lookup: search for prefix; create if requested. */
/* returns pointer if present or created; NULL if not. */
/* creation doesn't strdup so strings mustn't change later. */
State* lookup(char *prefix[NPREF], int create)
{
int i, h;
State *sp;
h = hash(prefix);
for (sp = statetab[h]; sp != NULL; sp = sp->next) {
for (i = 0; i < NPREF; i++)
if (strcmp(prefix[i], sp->pref[i]) != 0)
break;
if (i == NPREF) /* found it */
return sp;
}
if (create) {
sp = (State *) emalloc(sizeof(State));
for (i = 0; i < NPREF; i++)
sp->pref[i] = prefix[i];
sp->suf = NULL;
sp->next = statetab[h];
statetab[h] = sp;
}
return sp;
}
/* addsuffix: add to state. suffix must not change later */
void addsuffix(State *sp, char *suffix)
{
Suffix *suf;
suf = (Suffix *) emalloc(sizeof(Suffix));
suf->word = suffix;
suf->next = sp->suf;
sp->suf = suf;
}
/* add: add word to suffix list, update prefix */
void add(char *prefix[NPREF], char *suffix)
{
State *sp;
sp = lookup(prefix, 1); /* create if not found */
addsuffix(sp, suffix);
/* move the words down the prefix */
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1] = suffix;
}
/* build: read input, build prefix table */
void build(char *prefix[NPREF], FILE *f)
{
char buf[100], fmt[10];
/* create a format string; %s could overflow buf */
sprintf(fmt, "%%%ds", sizeof(buf)-1);
while (fscanf(f, fmt, buf) != EOF)
add(prefix, estrdup(buf));
}
/* generate: produce output, one word per line */
void generate(int nwords)
{
State *sp;
Suffix *suf;
char *prefix[NPREF], *w;
int i, nmatch;
for (i = 0; i < NPREF; i++) /* reset initial prefix */
prefix[i] = NONWORD;
for (i = 0; i < nwords; i++) {
sp = lookup(prefix, 0);
if (sp == NULL)
eprintf("internal error: lookup failed");
nmatch = 0;
for (suf = sp->suf; suf != NULL; suf = suf->next)
if (rand() % ++nmatch == 0) /* prob = 1/nmatch */
w = suf->word;
if (nmatch == 0)
eprintf("internal error: no suffix %d %s", i, prefix[0]);
if (strcmp(w, NONWORD) == 0)
break;
printf("%s\n", w);
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1] = w;
}
}
Here is what I have so far for my clean function
/*Clean Function*/
void clean_up(State *sp)
{
State *temp;
Suffix *temp2, temp3;
for(int h = 0; h < NHASH; h++)
{
for (sp = statetab[h]; sp != NULL; sp = sp->next)
{
while(sp->suf != NULL)
{
temp2= sp->suf;
temp3= *temp2->next;
free(temp2);
sp->suf= &temp3;
}
}
}
}
I think im on the right track, I'm going through each index in the hash table, then going from state to state and freeing the suffixes. I'm not sure what to do about the prefixes, because I have to free them before I can free each state. Any help would be greatly appreciated.
In your code, you are copying into a temp3 node, which lives in automatic memory ("on the stack") pointing sp->suf to this memory will (on the next iteration of the loop) cause free to be called with the address of this object (which has not been obtained by malloc, and thus cannot be freed by free() )
void clean_up(State *sp)
{
State *temp;
Suffix *temp2, **pp;
for(int h = 0; h < NHASH; h++)
{
for (sp = statetab[h]; sp != NULL; sp = sp->next)
{
for (pp = &sp->suf; *pp; *pp = temp2)
{
temp2 = (*pp)->next;
free(*pp);
}
}
}
}
The example code is derived from the Markov program in The Practice of Programming by Kernighan and Pike, a most excellent book.
Given that you are trying to clean up the statetab, the main clean-up function doesn't need any argument. You do have to be careful not to free the states directly in statetab, but you do need to release auxilliary states chained off statetab[i].next.
typedef struct State State;
typedef struct Suffix Suffix;
struct State { /* prefix + suffix list */
char* pref[NPREF]; /* prefix words */
Suffix* suf; /* list of suffixes */
State* next; /* next in hash table */
};
struct Suffix { /* list of suffixes */
char* word; /* suffix */
Suffix* next; /* next in list of suffixes */
};
State* statetab[NHASH]; /* hash table of states */
static void free_state(State *state);
static void free_suffix(Suffix *suffix);
static void cleanup(void)
{
for (int i = 0; i < NHASH; i++)
free_state(statetab[i]);
}
static void free_state(State *state)
{
if (state != 0)
{
for (int i = 0; i < NPREF; i++)
free(state->pref[i]);
free_suffix(state->suf);
if (state->next != 0)
{
free_state(state->next);
free(state->next);
}
}
}
static void free_suffix(Suffix *suffix)
{
if (suffix != 0)
{
free(suffix->word);
free_suffix(suffix->next);
free(suffix);
}
}
Do you see how I've designed the free_xxxx() code based on the design of the xxxx structure?
Caveat Lector: uncompiled code, much less tested code.
I dug up the code from the TPOP site, and tried to apply it. I made some fixes to the freeing code above (syntax error fixed, the null checks in free_state() and free_suffix()), but the code as a whole was not designed to allow the data to be freed.
There are a couple of problems. First, a few of the prefixes are not allocated (NONWORD). It might be possible to avoid releasing those by testing whether a prefix is NONWORD, but that's nasty. It might be possible to allocate those prefixes too (replace NONWORD by estrdup(NONWORD)). I think there's another place, somewhere, that a non-allocated pointer is being stashed in a prefix in the state table; I'm getting crashes in malloc() complaining of 'freeing non-allocated memory' (which is distinct from 'double freeing allocated memory', I believe), but I've not managed to resolve that.
However, that then changes to another problem; the prefixes are reused. That is, almost every prefix in the system is used as the the second word of one prefix, then as the first word of the next prefix. Thus, you can't readily free the prefixes.
If you were to design this so that the memory could be released, then you'd probably design it so that there was a system of 'atoms' (immutable strings) such that each word was allocated once and reused as often as necessary (see C Interfaces and Implementations: Techniques for Creating Reusable Code by D Hanson for the source of the term). The code freeing the state table would then concentrate only on the non-word data. There'd be code to release the complete set of atoms as well.
I ran the Markov program under valgrind without the cleanup; there are no memory access problems and no leaked data; it is all still accessible at program exit. I was using a data file of about 15,000 words (and about 2900 distinct words), and the statistics were:
==9610== HEAP SUMMARY:
==9610== in use at exit: 695,269 bytes in 39,567 blocks
==9610== total heap usage: 39,567 allocs, 0 frees, 695,269 bytes allocated
==9610==
==9610== LEAK SUMMARY:
==9610== definitely lost: 0 bytes in 0 blocks
==9610== indirectly lost: 0 bytes in 0 blocks
==9610== possibly lost: 0 bytes in 0 blocks
==9610== still reachable: 695,269 bytes in 39,567 blocks
So, you set yourself an interesting exercise. However, I think it is not achievable without reworking some of the memory allocation mechanism so that the data can be freed cleanly.
(On BSD, and hence on Mac OS X too, there are a pair of functions in <stdlib.h> called setprogname() and getprogname(). On BSD, setprogname() is called automatically before the main() gets going (with argv[0], I believe). The declaration in eprintf.h conflicts with the declaration in <stdlib.h>, which may be why the code in the question uses setProgName() instead of the original setprogname(). I chose to fix setprogname() in eprintf.h so that it took a const char * argument and therefore matched the declaration in <stdlib.h>.)
TPOP was previously at
http://plan9.bell-labs.com/cm/cs/tpop and
http://cm.bell-labs.com/cm/cs/tpop but both are now (2015-08-10) broken.
See also Wikipedia on TPOP.
I am trying to tokenize a string. I have a table of available tokens ordered in the form of a trie. Each token knows it has children. A simple tokens table will look like,
pattern value has_children
-------- ------ --------
s s-val 1
stack stack-val 0
over over-val 1
overflow overflow-val 0
In this table, stack is a child of s and overflow is a child of over. In practice, this table will have 5000+ records ordered in this way.
Now, given a string stackover, it should output stack-valover-val. Algorithm is greedy and it will try to find the longest match always.
To do this, I will start reading each character from the input, look for match, if a match found and the token has children, look for match again by including next character. Do this until we find the longest match. If no match found, try to match by including the next character until we reach the end of string or a successful match.
If we reached end of the string without a match, output ? symbol and remove the first character from the input. Repeat the whole process with remaining characters.
This algorithm works, but the backtracking and iterating on all possible combinations of the input makes it slow and complex.
I am wondering is there a better way of solving this? Any help would be appreciated.
Instead of backtracking you could keep in memory all possible results, until one result singles out at certain point in input stream. Example
Tokens: S STACK STACKOVERFLOW STAG OVER OVERFLOW
String: SSTACKOVERFUN
1 - Found S on place 0, have tokens that begin with S, try them all, only S is valid, so resolve S
2 - S on 1, have such tokens, try them, possible valid are S and STACK. Don't resolve, just keep them in mind.
3 - T on 2, have no such tokens, so S could be resolved now, but we also have longer token (STACK) so S is no good. Ditch S, and STACK is only left, but it has children. Try string for children. There are no possible children so resolve STACK
4 - O on 6, have such tokens, try them, have only OVER, so resolve OVER
5 - F on 10, no such tokens, and nothing to resolve from before so this is non-tokenizable
6 and 7 - same as step 5
Final result: S STACK OVER fun
Could you use the Aho-Corasick algorithm? It creates an automaton to search a keyword tree (trie).
I'm thinking that you want to take all of your keywords and sort them reverse alphabetically, so your list would become (plus a few extras)
0 stack 1
1 s 0
2 overflow 3
3 over 5
4 ovum 5
5 o 0
6 exchange 7
7 ex 0
The third column of this list are pointers to the parent token which is always lower on the list. Then you can take your target string and binary search where it fits on this list. If it lands above a token which matches then you clip off that portion and repeat the process for the remainder. If it doesn't match you use the parent pointer to find the next longest potential matching token.
If you want to get really fancy you can also chunk up the strings into 64bit words and compare 8 characters at once in the binary search.
I suggest you try Ragel, It can generate efficient scanners that can do longest match/backtracking. See chapter 6.3 in the Ragel user guide for more information.
I've created a tiny test which I think matches your specification, this is only the state machine description, without the code to feed input:
%%{
machine test;
main := |*
's' => { puts("s-val");};
'stack' => { puts("stack-val");};
'over' => { puts("over-val");};
'overflow' => { puts("overflow-val");};
# Anything else matches to any, outputs a '?' and continues
any => {putc('?');};
*|;
}%%
The following token_tree code is based on the prefix_tree class from ZeroMQ
The prefix_tree class only returns "true" when one of the tree's prefixes matches the start of the input text. It will not even tell you which prefix or how long that prefix was.
This token_tree will look for the longest token that matches the start of the input text. The search
function token_tree_longest_token() only needs to return the length of the longest token matched
against the start of the input text.
The basic algorithm is similar to the one described in the question, but it's implmentation might be faster.
Also there are some ways to improve memory usage, which could have it faster.
#include <stdint.h>
#include <stdlib.h>
/* #define TEST_TOKEN_TREE */
/*
* TODO: possible improvements, use multiple types of nodes: string/branch/leaf.
* The string node would replace a chain of normal token_nodes and save memory.
* This would require spliting a node to add branch points.
* Use these structs:
* struct token_node {
* uint32_t ref_count;
* uint8_t node_type; -- node is token_node_str/token_node_branch/token_node_leaf
* };
* struct token_node_str {
* token_node base;
* uint8_t reserved;
* uint16_t len; -- string length
* token_node *child; -- string nodes can only have one child.
* uint8_t str[0]; -- embedded string (not null-terminated)
* };
* struct token_node_branch {
* token_node base;
* uint8_t min; -- smallest char in child list.
* uint16_t count; -- child count.
* token_node *children[0];
* };
* struct token_node_leaf { -- leaf nodes have no children.
* token_node base;
* };
* This will save memory, but will make code much more complex.
*/
typedef struct token_tree token_tree;
typedef struct token_node token_node;
struct token_tree {
token_node *root; /**< root node of token tree. */
};
struct token_node {
uint32_t ref_count; /**< how many token references end at this node. */
uint8_t min; /**< smallest 'char' in children's list. */
uint8_t reserved; /**< padding. */
uint16_t count; /**< number of children. (max count = 256, so count must be 16bits) */
token_node *children[0]; /**< list of children nodes. index by (c - min) */
};
#define NODE_SIZE(count) (sizeof(token_node) + (sizeof(token_node *) * count))
static token_node *token_node_new(uint16_t count) {
token_node *node = calloc(1, NODE_SIZE(count));
node->count = count;
return node;
}
static void token_node_build_chain(token_node **pnode, const uint8_t *token, size_t len) {
token_node *node;
do {
/* the last node in the chain will have no children. */
node = token_node_new((len == 0) ? 0 : 1);
*pnode = node; /* add node to slot in parent's children list. */
if(len == 0) break;
/* new node will have one child. */
node->min = *token;
node->count = 1;
/* slot where next node will be saved. */
pnode = &(node->children[0]);
/* consume char. */
token++;
len--;
} while(1);
/* mark last node as end of a valid token. */
node->ref_count++;
}
static void token_node_free(token_node *node) {
uint32_t i;
uint32_t count = node->count;
/* free children nodes. */
for(i=0; i < count; i++) {
if(node->children[i]) token_node_free(node->children[i]);
}
free(node);
}
static void token_node_grow(token_node **pnode, uint8_t c) {
token_node *node = *pnode;
token_node **children;
uint8_t old_min = node->min;
uint16_t old_count = node->count;
uint32_t i;
uint8_t min;
uint16_t count;
if(c < old_min) {
min = c;
count = old_count + (old_min - min);
} else {
if(old_count == 0) {
/* the list was empty, so this is the first char. */
old_min = c;
}
min = old_min;
c -= old_min;
if(c < old_count) {
/* don't need to grow. */
return;
}
count = c + 1;
}
node = realloc(node, NODE_SIZE(count));
*pnode = node;
children = node->children;
/* if the 'min' value changed, then we need to move all the old slots up. */
if(old_min != min) {
uint32_t diff = old_min - min;
for(i=count-1; i >= diff; i--) {
children[i] = children[i - diff];
}
/* null new slots at start of children list. */
for(i=0; i < diff; i++) {
children[i] = NULL;
}
} else {
/* null new slots at end of children list. */
for(i=old_count; i < count; i++) {
children[i] = NULL;
}
}
node->min = min;
node->count = count;
}
static token_node **token_node_find_last_node(token_node **pnode, const uint8_t **ptoken, size_t *plen) {
const uint8_t *token = *ptoken;
size_t len = *plen;
uint32_t c;
token_node *node = *pnode;
while(node && len) {
/* next char. */
c = (*token);
/* if c < node->min, then it will underflow and be > node->count. */
c -= node->min;
/* make sure c is in range. */
if(c >= node->count) {
/*
* NOTE: we don't consume this char and "*pnode" will not be null.
* When adding tokens, this node will be grown to hold more children.
*/
break;
}
/* consume char. */
token++;
len--;
/* get pointer to next node's slot. */
pnode = &(node->children[c]);
node = *pnode;
}
*ptoken = token;
*plen = len;
/* return pointer to last node's slot. */
return pnode;
}
static void token_node_add(token_node **pnode, const uint8_t *token, size_t len) {
token_node *node;
/* find last node in chain for this token. */
pnode = token_node_find_last_node(pnode, &token, &len);
/* if full token was consumed then we found the last node for this token. */
if(!len) {
node = *pnode;
node->ref_count++;
return;
}
/* check if the children list of the last node needs to be grown. */
node = *pnode;
if(node) {
uint32_t c = *token;
/* consume char. */
token++;
len--;
/* grow node to make room for new char. */
token_node_grow(pnode, c);
node = *pnode; /* token_node_grow() may change the node's pointer. */
/* get slot for new child. */
pnode = &(node->children[c - node->min]);
}
/* build node chain for un-consumed part of token. */
token_node_build_chain(pnode, token, len);
}
static size_t token_node_longest_token(token_node *node, const uint8_t *text, size_t len) {
size_t last_token_len = 0;
size_t off = 0;
uint32_t c;
/* loop until we get a NULL node or run out of text. */
do {
if(node->ref_count > 0) {
/* found a token, keep track of it's length. */
last_token_len = off;
}
/* end of input text. */
if(off >= len) break;
/* next char. */
c = text[off];
/* if c < node->min, then it will underflow and be > node->count. */
c -= node->min;
/* make sure c is in range. */
if(c >= node->count) {
/* End of search, no more child nodes. */
break;
}
/* consume char. */
off++;
/* get pointer to next node's slot. */
node = node->children[c];
} while(node);
/* return length of largest token found. */
return last_token_len;
}
extern token_tree *token_tree_new() {
token_tree *tree = malloc(sizeof(token_tree));
tree->root = token_node_new(0);
return tree;
}
extern void token_tree_free(token_tree *tree) {
token_node_free(tree->root);
free(tree);
}
extern void token_tree_add(token_tree *tree, const char *token, size_t len) {
token_node_add(&(tree->root), token, len);
}
extern size_t token_tree_longest_token(token_tree *tree, const char *text, size_t len) {
return token_node_longest_token(tree->root, text, len);
}
#ifdef TEST_TOKEN_TREE
#include <stdio.h>
#include <string.h>
static const char *test_tokens[] = {
"s",
"stack",
"stackoverflow",
"over",
"overflow",
NULL,
};
static const char *test_input[] = {
"aastackoverasdfasdf",
"stack7777",
"777stack777",
"overstackflow",
NULL,
};
static void add_tokens(token_tree *tree, const char **tokens) {
int i;
for(i = 0; tokens[i] != NULL; i++) {
token_tree_add(tree, tokens[i], strlen(tokens[i]));
}
}
static void print_tokens(token_tree *tree, const char *text) {
size_t len = strlen(text);
size_t token_len;
printf("input: \"%s\"\n", text);
printf("tokens: [");
while(len) {
token_len = token_tree_longest_token(tree, text, len);
if(token_len > 0) {
printf("<%.*s>", (int)token_len, text);
} else {
printf("?");
token_len = 1;
}
text += token_len;
len -= token_len;
}
printf("]\n");
}
static void run_test(token_tree *tree, const char **texts) {
int i;
for(i = 0; texts[i] != NULL; i++) {
print_tokens(tree, texts[i]);
}
}
int main(int argc, char *argv[]) {
token_tree *tree = token_tree_new();
add_tokens(tree, test_tokens);
run_test(tree, test_input);
run_test(tree, test_tokens);
token_tree_free(tree);
}
#endif
Edited to include short description of what is expected from the code.
#include <sys/file.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define MAX_PAGE 0xFF+1
/* page table entry you may need to add your own fields to it*/
typedef struct
{
unsigned short frame;/*location*/
unsigned int valid:1;
unsigned int in_mem:1;
unsigned int dirty:1;
unsigned int last_frame;
} pt_entry;
/* list entry for physical frames*/
struct list_item
{
unsigned short frame;
struct list_item *next;
struct list_item *prev;
int page_num;
};
typedef struct list_item *list;
void start_simulation(FILE *);
void resolve(int);
unsigned short find_frame(void);
unsigned short find_victim(void);
void display_stats(void);
void to_resident_set(list);
void free_mem(list);
void invalidate(unsigned short);
/*============================ header ends here ============================== *
/*#include "lru.h"*/
pt_entry pte[MAX_PAGE]; /* page table */
int mem_size; /* physical memory size in page frames */
list free_list_head; /* free list */
list res_set_head; /* resident set */
int total_fault = 0; /* total number of page faults */
int total_ref = 0; /* total number of memory references */
/* main program:
** read in paramters, and open the input file start the simulation */
int main(int argc, char *argv[])
{
FILE *stream;
if (argc != 3)
{
printf("The format is: pager file_name memory_size.\n");
exit(1);
}
printf("File used %s, resident set size %d\n", argv[1], atoi(argv[2]));
if ((stream = fopen(argv[1], "r")) == NULL)
{
perror("File open failed");
exit(1);
}
mem_size = atoi(argv[2]);
start_simulation(stream);
fclose(stream);
}
/*initialise the page table
** initialise the resident set, and the free list
** in the simulation loop
**16-bit memory addresses representing the program trace are read from the input
**file one by one the virtual address is resolved ie. physical frame for the
**virtual page identified
**the loop exits when it encounters the end of file
** free memory allocated for lists
** display statistics
*/
void start_simulation(FILE * stream)
{
char *addr_buf;
int address;
int i, n;
list new_entry, current;
/* initialise the page table */
for(i=0; i<MAX_PAGE;i++)
{
pte[i].frame = -1;
pte[i].valid = 0;
pte[i].dirty = 0;
pte[i].in_mem = 0;
}
/* initialise the resident set - empty*/
res_set_head = (list)malloc(sizeof(struct list_item));
res_set_head->next = res_set_head;
res_set_head->prev = res_set_head;
/* initialise free list - all physical pages*/
free_list_head = (list)malloc(sizeof(struct list_item));
free_list_head->next = free_list_head;
free_list_head->prev = free_list_head;
current = free_list_head;
for(i=0; i<mem_size;i++)
{
new_entry = (list)malloc(sizeof(struct list_item));
current->next = new_entry;
new_entry->prev = current;
new_entry->next = free_list_head;
new_entry->frame = i;
current = new_entry;
free_list_head->prev = current;
}
/* main simulation loop */
while( (n = fscanf(stream, "%x", &address)) != -1)
{
resolve(address);
total_ref++;
}
free_mem(free_list_head);
free_mem(res_set_head);
display_stats();
return;
}
/* resolve address reference
** if page table entry valid - do nothing
** if page table entry invalid - find a physical frame for this page
**and update pte for the page
*/
void resolve(int address)
{
unsigned short frame_alloc;
int virt_page;
static int disp_counter = 0;
virt_page = address >> 8;
if (pte[virt_page].valid == 1)
{
/*Was trying to implement */
//pte[virt_page].frame = pte[0];
}
else
{
frame_alloc = find_frame();
pte[virt_page].valid = 1;
pte[virt_page].frame = frame_alloc;
total_fault++;
}
}
/* find_frame:
** if free list is empty find a victim frame
** else detach the last frame of the free list and attach it
** to the resident set
** return frame number
*/
unsigned short find_frame()
{
unsigned short frame;
list current, new_tail;
if (free_list_head == free_list_head->prev) /* free list empty */
frame = find_victim();
else
{
new_tail = free_list_head->prev->prev;
new_tail->next = free_list_head;
current = free_list_head->prev;
free_list_head->prev = new_tail;
to_resident_set(current);
frame = current->frame;
}
return frame;
}
/* to_resident_set:
** attach a list entry at the end of resident set
*/
void to_resident_set(list current)
{
list tail;
tail = res_set_head->prev;
tail->next = current;
current->next = res_set_head;
current->prev = tail;
res_set_head->prev = current;
}
/* find_victim:
** As you can see I simply take the first page frame from the resident set list.
** This implements the FIFO replacement strategy. Your task is to replace it with
** a more efficient strategy.
*/
unsigned short find_victim()
{
int i;
unsigned short frame=0;
list current;
for(i=0;i<MAX_PAGE;i++)
{
if (pte[i].frame == frame && pte[i].valid == 1)
{
frame = res_set_head->next->frame;
invalidate(frame);
current = res_set_head->next;
res_set_head->next = current->next;
res_set_head->next->prev = res_set_head;
to_resident_set(current);
break;
}
}
return frame;
}
/* invalidate:
** invalidate the page table entry for the victim page */
void invalidate(unsigned short frame)
{
int i;
for(i=0;i<MAX_PAGE;i++)
{
if (pte[i].frame == frame && pte[i].valid == 1)
{
pte[i].valid = 0;
pte[i].frame = -1;
break;
}
}
}
/* display_stats:
** This is very basic, you may want to make it more sophisticated,
** for example save the data from multiple runs into a file for
** comparison etc
*/
void display_stats()
{
printf("\nProcess issued %d memory references\n", total_ref);
printf("Process triggered %d page faults\n", total_fault);
printf("Pafe fault rate is %d percent\n",((total_fault*100)/total_ref));
}
/* free memory allocated to the list */
void free_mem(list head)
{
list current,tail;
tail = head->prev;
current = head;
while (current->prev != tail)
{
current = current->next;
free(current->prev);
}
}
The most obvious problem lies in the input to your algorithm.
The restpage array is a global array and will thus be initialised to contain only the value 0. You then use these array elements as the page-numbers you are requesting, which means that your algorithm processes only requests for page 0 if mem_size < 100.
And if mem_size >= 100, you are overrunning the array bounds and land squarely in the land of undefined behaviour.
There are two fixes you need to make:
Just as you are checking for a valid file in the command-line arguments, you must also check that mem_size is not too large
Write an additional loop to give each element in restpage a random value, to ensure not all page requests are for the same page.
You have dimensioned restpage to [100] but mem_size seems freely configurable, is this the intent?
mem_size = atoi(argv[2]);
fclose(stream);
..
for(i=0;i<mem_size;i++)
{
totalabsence+=find_victim(&pt,restpage[i]);
}
EDIT:
I see one bug in your new code, in your find_victim you don't initialize the local variable 'frame'
EDITx2:
When you read from the file you may just want to put one hex address on each line
and use instead fgets() to read the file line by line (or load the whole file and
go through it line by line).