Getting the actual executable path of current process context - Linux kernel - c

I'm trying to get the actual executable path of a running process through my kernel driver.
I've done the following:
static struct kretprobe do_fork_probe = {
.entry_handler = (kprobe_opcode_t *) process_entry_callback,
.handler = (kprobe_opcode_t *) NULL,
.maxactive = 1000,
.data_size = 0
};
do_fork_probe.kp.addr = (kprobe_opcode_t*)kallsyms_lookup_name("do_fork");
if ((ret = register_kretprobe(&do_fork_probe)) < 0)
return -1;
static int process_entry_callback(struct kretprobe_instance *ri, struct pt_regs *regs)
{
printk("Executable path = %s\n", executable_path(current));
return 0;
}
The executable_path function:
char* executable_path(struct task_struct* process)
{
#define PATH_MAX 4096
char* p = NULL, *pathname;
struct mm_struct* mm = current->mm;
if (mm)
{
down_read(&mm->mmap_sem);
if (mm->exe_file)
{
pathname = kmalloc(PATH_MAX, GFP_ATOMIC);
if (pathname)
p = d_path(&mm->exe_file->f_path, pathname, PATH_MAX);
}
up_read(&mm->mmap_sem);
}
return p;
}
The problem is that if I run an executable using bash as follows:
./execname
I'm getting the following output:
Executable path = /bin/bash
While what I really want is the : execname (Actually its full path but lets start with the name)
Any suggestions?

It is unclear what you try to get, so here are list of options:
execname as it is considered by SystemTap. Simple process->comm should suffice. That is how comm field defined in Kernel:
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
it with task_lock())
- initialized normally by setup_new_exec */
But if bash is a symlink, than comm should contain symlink's name, not the real executable name.
argv[0] first element of command line arguments array as it seen my application (and may be altered by it). There is a get_cmdline() function in kernel, but it seem not to be exported.
Basename of full path. In this case, do not call d_path, just take d_name field of dentry:
strlcpy(pathname, mm->exe_file->f_path->d_name, PATH_MAX);
But it sounds like a XY problem. You trying to get executable names for all forking processes? Why not use SystemTap directly?
# stap -v -e 'probe scheduler.process_fork { println(execname()); }'

Related

Linux : setting locale at runtime and interprocess dependencies

I am stuck in a strange problem.
I have two scripts (C program executables) running on ARM linux machine that are mounting the same USB device (containing chinese character filenames) on two different paths, as soon as the device is inserted.
int mount(const char *source, const char *target,
const char *filesystemtype, unsigned long mountflags,
const void *data);
In the last parameter,
Script A passes "utf8" and Script B passes 0.
So, as soon as I insert the USB device, the scripts race to mount the device.
If Script A mounts first (which passes utf8 parameter), I get proper filenames. This is the mount command output [Notice that even second mount has utf8 as parameter, even if its not passed. Why?]
/dev/sdb1 on /home/root/script1 type vfat (ro,relatime,fmask=0022,dmask=0022,codepage=437,iocharset=iso8859-1,shortname=mixed,utf8,errors=remount-r
o)
/dev/sdb1 on /home/root/script2 type vfat (ro,relatime,fmask=0022,dmask=0022,codepage=437,iocharset=iso8859-1,shortname=mixed
,utf8,errors=remount-ro)
But if script B mounts first(which passes 0 as last parameter to mount), I get broken filenames ?????.mp3 from readdir(). This is the mount command output.
/dev/sdb1 on /home/root/script2 type vfat (ro,relatime,fmask=0022,dmask=0022,codepage=437,iocharset=iso8859-1,shortname=mixed,errors=remount-ro)
/dev/sdb1 on /home/root/script1 type vfat (ro,relatime,fmask=0022,dmask=0022,codepage=437,iocharset=iso8859-1,shortname=mixed
,errors=remount-ro)
EDIT
This is the basic mount code of both the scripts developed for testing(only difference in last mount argument). Both scripts are executed immediately on reboot using a service.
//mount the device
ret = mount("/dev/sda1", "/home/root/script1/", "vfat", 1, "utf8");
if (ret == 0) {
fprintf(stdout,"mount() succeeded.\n");
sleep(2000);
} else {
ret = mount("/dev/sdb1", "/home/root/script1/", "vfat", 1, "utf8");
if(ret == 0)
{
fprintf(stdout,"mount() succeeded\n");
sleep(2000);
}
else
{
fprintf(stdout,"/dev/sdb1 mount() failed: %d, %s\n", errno, strerror(errno));
ret = mount("/dev/sdc1", "/home/root/script1/", "vfat", 1, "utf8");
if(ret == 0)
{
fprintf(stdout,"mount() succeeded\n");
sleep(2000);
}
else
fprintf(stdout,"mount() failed: %d, %s\n", errno, strerror(errno));
}
}
Generally speaking, you should never mount same filesystem twice -- if OS drivers will decide to write twice to the same block, you'll get filesystem corruption. Use bind-mounts in such cases.
Linux, however, is smart enough to help you with that -- it will reuse older filesystem mount super_block (with all mountpoint flags) for a to a location.
I couldn't find it in documentation, but it is traceable through kernel source in sget() which is called by mount_bdev():
hlist_for_each_entry(old, &type->fs_supers, s_instances) {
if (!test(old, data))
continue;
if (!grab_super(old))
goto retry;
if (s) {
up_write(&s->s_umount);
destroy_super(s);
s = NULL;
}
return old;
}
In this snippet it'll seek for previous instance of super_block corresponding to a block device, and if it already exists -- simply returns it.
Some practical proof using SystemTap:
# stap -e 'probe kernel.function("sget").return {
sb = $return;
active = #cast(sb, "super_block")->s_active->counter;
fsi = #cast(sb, "super_block")->s_fs_info;
uid = fsi == 0 ? -1
: #cast(fsi, "msdos_sb_info", "vfat")->options->fs_uid;
printf("%p active=%d uid=%d\n", sb, active, uid);
}'
Setting uid in second mount doesn't alter option, but increases number of active mounts (obvious):
# mount /dev/sdd1 /tmp/mnt1
0xffff8803ce87e800 active=1 uid=-1
# mount -o uid=1000 /dev/sdd1 /tmp/mnt2
0xffff8803ce87e800 active=2 uid=0
Mounting in reverse order also inherits mount options:
# mount -o uid=1000 /dev/sdd1 /tmp/mnt2
0xffff8803cc609c00 active=1 uid=-1
# mount /dev/sdd1 /tmp/mnt1
0xffff8803cc609c00 active=2 uid=1000
If you wish to know who was responsible for such behavior, ask Linus, similiar code exists since 0.11:
struct super_block * get_super(int dev)
{
struct super_block * s;
if (!dev)
return NULL;
s = 0+super_block;
while (s < NR_SUPER+super_block)
if (s->s_dev == dev) {
wait_on_super(s);
if (s->s_dev == dev)
return s;
s = 0+super_block;
} else
s++;
return NULL;
}
(but when this code was in charge, sys_mount() explicitly checked that no other mountpoints exist for that superblock).
You can possibly try to ask a question at LKML.

Find pathname from dlopen handle on OSX

I have dlopen()'ed a library, and I want to invert back from the handle it passes to me to the full pathname of shared library. On Linux and friends, I know that I can use dlinfo() to get the linkmap and iterate through those structures, but I can't seem to find an analogue on OSX. The closest thing I can do is to either:
Use dyld_image_count() and dyld_get_image_name(), iterate over all the currently opened libraries and hope I can guess which one corresponds to my handle
Somehow find a symbol that lives inside of the handle I have, and pass that to dladdr().
If I have apriori knowledge as to a symbol name inside of the library I just opened, I can dlsym() that and then use dladdr(). That works fine. But in the general case where I have no idea what is inside this shared library, I would need to be able to enumerate symbols to do that, which I don't know how to do either.
So any tips on how to lookup the pathname of a library from its dlopen handle would be very much appreciated. Thanks!
Here is how you can get the absolute path of a handle returned by dlopen.
In order to get the absolute path, you need to call the dladdr function and retrieve the Dl_info.dli_fname field.
In order to call the dladdr function, you need to give it an address.
In order to get an address given a handle, you have to call the dlsym function with a symbol.
In order to get a symbol out of a loaded library, you have to parse the library to find its symbol table and iterate over the symbols. You need to find an external symbol because dlsym only searches for external symbols.
Put it all together and you get this:
#import <dlfcn.h>
#import <mach-o/dyld.h>
#import <mach-o/nlist.h>
#import <stdio.h>
#import <string.h>
#ifdef __LP64__
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct nlist_64 nlist_t;
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct nlist nlist_t;
#endif
static const char * first_external_symbol_for_image(const mach_header_t *header)
{
Dl_info info;
if (dladdr(header, &info) == 0)
return NULL;
segment_command_t *seg_linkedit = NULL;
segment_command_t *seg_text = NULL;
struct symtab_command *symtab = NULL;
struct load_command *cmd = (struct load_command *)((intptr_t)header + sizeof(mach_header_t));
for (uint32_t i = 0; i < header->ncmds; i++, cmd = (struct load_command *)((intptr_t)cmd + cmd->cmdsize))
{
switch(cmd->cmd)
{
case LC_SEGMENT:
case LC_SEGMENT_64:
if (!strcmp(((segment_command_t *)cmd)->segname, SEG_TEXT))
seg_text = (segment_command_t *)cmd;
else if (!strcmp(((segment_command_t *)cmd)->segname, SEG_LINKEDIT))
seg_linkedit = (segment_command_t *)cmd;
break;
case LC_SYMTAB:
symtab = (struct symtab_command *)cmd;
break;
}
}
if ((seg_text == NULL) || (seg_linkedit == NULL) || (symtab == NULL))
return NULL;
intptr_t file_slide = ((intptr_t)seg_linkedit->vmaddr - (intptr_t)seg_text->vmaddr) - seg_linkedit->fileoff;
intptr_t strings = (intptr_t)header + (symtab->stroff + file_slide);
nlist_t *sym = (nlist_t *)((intptr_t)header + (symtab->symoff + file_slide));
for (uint32_t i = 0; i < symtab->nsyms; i++, sym++)
{
if ((sym->n_type & N_EXT) != N_EXT || !sym->n_value)
continue;
return (const char *)strings + sym->n_un.n_strx;
}
return NULL;
}
const char * pathname_for_handle(void *handle)
{
for (int32_t i = _dyld_image_count(); i >= 0 ; i--)
{
const char *first_symbol = first_external_symbol_for_image((const mach_header_t *)_dyld_get_image_header(i));
if (first_symbol && strlen(first_symbol) > 1)
{
handle = (void *)((intptr_t)handle | 1); // in order to trigger findExportedSymbol instead of findExportedSymbolInImageOrDependentImages. See `dlsym` implementation at http://opensource.apple.com/source/dyld/dyld-239.3/src/dyldAPIs.cpp
first_symbol++; // in order to remove the leading underscore
void *address = dlsym(handle, first_symbol);
Dl_info info;
if (dladdr(address, &info))
return info.dli_fname;
}
}
return NULL;
}
int main(int argc, const char * argv[])
{
void *libxml2 = dlopen("libxml2.dylib", RTLD_LAZY);
printf("libxml2 path: %s\n", pathname_for_handle(libxml2));
dlclose(libxml2);
return 0;
}
If you run this code, it will yield the expected result: libxml2 path: /usr/lib/libxml2.2.dylib
After about a year of using the solution provided by 0xced, we discovered an alternative method that is simpler and avoids one (rather rare) failure mode; specifically, because 0xced's code snippet iterates through each dylib currently loaded, finds the first exported symbol, attempts to resolve it in the dylib currently being sought, and returns positive if that symbol is found in that particular dylib, you can have false positives if the first exported symbol from an arbitrary library happens to be present inside of the dylib you're currently searching for.
My solution was to use _dyld_get_image_name(i) to get the absolute path of each image loaded, dlopen() that image, and compare the handle (after masking out any mode bits set by dlopen() due to usage of things like RTLD_FIRST) to ensure that this dylib is actually the same file as the handle passed into my function.
The complete function can be seen here, as a part of the Julia Language, with the relevant portion copied below:
// Iterate through all images currently in memory
for (int32_t i = _dyld_image_count(); i >= 0 ; i--) {
// dlopen() each image, check handle
const char *image_name = _dyld_get_image_name(i);
uv_lib_t *probe_lib = jl_load_dynamic_library(image_name, JL_RTLD_DEFAULT);
void *probe_handle = probe_lib->handle;
uv_dlclose(probe_lib);
// If the handle is the same as what was passed in (modulo mode bits), return this image name
if (((intptr_t)handle & (-4)) == ((intptr_t)probe_handle & (-4)))
return image_name;
}
Note that functions such as jl_load_dynamic_library() are wrappers around dlopen() that return libuv types, but the spirit of the code remains the same.

Porting Unix to Windows- usage of pwd.h

I'm trying to compile libUnihan code with MinGW, but have run into a function which requires porting. The purpose of the function is to get a canonical path representation. It uses pwd.h (which is POSIX, and MinGW isn't) so it can account for the use of '~' to mean the home directory by retrieving a passwd struct, which contains pw_dir. I did find a little information here, and a port of realpath here, but I am still entirely at a loss as to how to deal with this. With MinGW, I still have a home directory represented by ~ and located at /home/nate, but since it isn't POSIX, I don't have pwd.h to help me find where this home directory is.
Q: How can I port the function below to work properly with MinGW?
/**
* Return the canonicalized absolute pathname.
*
* It works exactly the same with realpath(3), except this function can handle the path with ~,
* where realpath cannot.
*
* #param path The path to be resolved.
* #param resolved_path Buffer for holding the resolved_path.
* #return resolved path, NULL is the resolution is not sucessful.
*/
gchar*
truepath(const gchar *path, gchar *resolved_path){
gchar workingPath[PATH_MAX];
gchar fullPath[PATH_MAX];
gchar *result=NULL;
g_strlcpy(workingPath,path,PATH_MAX);
// printf("*** path=%s \n",path);
if ( workingPath[0] != '~' ){
result = realpath(workingPath, resolved_path);
}else{
gchar *firstSlash, *suffix, *homeDirStr;
struct passwd *pw;
// initialize variables
firstSlash = suffix = homeDirStr = NULL;
firstSlash = strchr(workingPath, DIRECTORY_SEPARATOR);
if (firstSlash == NULL)
suffix = "";
else
{
*firstSlash = 0; // so userName is null terminated
suffix = firstSlash + 1;
}
if (workingPath[1] == '\0')
pw = getpwuid( getuid() );
else
pw = getpwnam( &workingPath[1] );
if (pw != NULL)
homeDirStr = pw->pw_dir;
if (homeDirStr != NULL){
gint ret=g_sprintf(fullPath, "%s%c%s", homeDirStr, DIRECTORY_SEPARATOR, suffix);
if (ret>0){
result = realpath(fullPath, resolved_path);
}
}
}
return result;
}
The purpose is to implement ~[username]/ remapping logic. This sort of code makes sense in Linux/UNIX environments, but the most common use is just to refer to the user's own home directory.
For expediency, I'd just add support for the common case - ~/ - i.e. the current user, and not bother supporting the more general case - have it fail with an obvious error in that case.
The function to get the current user's home directory is SHGetFolderPath.
#include <windows.h>
char homeDirStr[MAX_PATH];
if (SUCCEEDED(SHGetFolderPath(NULL, CSIDL_PERSONAL, NULL, 0, homeDirStr))) {
// Do something with the path
} else {
// Do something else
}
In the case of a failed lookup of the user, the code you pasted does not try to replace that string, but simply returns NULL, so you could emulate that.

a recursive function to manipulate a given path

I am working on modifying the didactic OS xv6 (written in c) to support symbolic links (AKA shortcuts).
A symbolic link is a file of type T_SYM that contains a path to it's destination.
For doing that, i wrote a recursive function that gets a path and a buffer and fills the buffer with the "real" path (i.e. if the path contains a link, it should be replaced by the real path, and a link can occur at any level in the path).
Basically, if i have a path a/b/c/d, and a link from f to a/b, the following operations should be equivalent:
cd a/b/c/d
cd f/c/d
Now, the code is written, but the problem that i try to solve is the problem of starting the path with "/" (meaning that the path is absolute and not relative).
Right now, if i run it with a path named /dir1 it treats it like dir1 (relative instead of absolute).
This is the main function, it calls the recursive function.
pathname is the given path, buf will contain the real path.
int readlink(char *pathname, char *buf, size_t bufsize){
char name[DIRSIZ];
char realpathname[100];
memset(realpathname,0,100);
realpathname[0] = '/';
if(get_real_path(pathname, name, realpathname, 0, 0)){
memmove(buf, realpathname, strlen(realpathname));
return strlen(realpathname);
}
return -1;
}
This is the recursive part.
the function returns an inode structure (which represents a file or directory in the system). it builds the real path inside realpath.
ilock an iunlock are being used to use the inode safely.
struct inode* get_real_path(char *path, char *name, char* realpath, int position){
struct inode *ip, *next;
char buf[100];
char newpath[100];
if(*path == '/')
ip = iget(ROOTDEV, ROOTINO);// ip gets the root directory
else
ip = idup(proc->cwd); // ip gets the current working directory
while((path = skipelem(path, name)) != 0){name will get the next directory in the path, path will get the rest of the directories
ilock(ip);
if(ip->type != T_DIR){//if ip is a directory
realpath[position-1] = '\0';
iunlockput(ip);
return 0;
}
if((next = dirlookup(ip, name, 0)) == 0){//next will get the inode of the next directory
realpath[position-1] = '\0';
iunlockput(ip);
return 0;
}
iunlock(ip);
ilock(next);
if (next->type == T_SYM){ //if next is a symbolic link
readi(next, buf, 0, next->size); //buf contains the path inside the symbolic link (which is a path)
buf[next->size] = 0;
iunlockput(next);
next = get_real_path(buf, name, newpath, 0);//call it recursively (might still be a symbolic link)
if(next == 0){
realpath[position-1] = '\0';
iput(ip);
return 0;
}
name = newpath;
position = 0;
}
else
iunlock(next);
memmove(realpath + position, name, strlen(name));
position += strlen(name);
realpath[position++]='/';
realpath[position] = '\0';
iput(ip);
ip = next;
}
realpath[position-1] = '\0';
return ip;
}
I have tried many ways to do it right but with no success. If anyone sees the problem, i'd be happy to hear the solution.
Thanks,
Eyal
I think it's clear that after running get_real_path(pathname, name, realpathname, 0, 0) the realpathname cannot possibly start with a slash.
Provided the function executes successfully, the memmove(realpath + position, name, strlen(name)) ensures that realpath starts with name, as the position variable always contains zero at the first invocation of memmove.
I'd suggest something like
if(*path == '/') {
ip = iget(ROOTDEV, ROOTINO); // ip gets the root
realpath[position++] = '/';
} else
ip = idup(proc->cwd); // ip gets the current working directory
P.S. I'm not sure why you put a slash into the realpathname before executing the get_real_path, since at this point you don't really know whether the path provided is an absolute one.
Ok, found the problem...
The problem was deeper than what i thought...
Somehow the realpath was changed sometimes with no visible reason... but the reason was the line:
name = newpath;
the solution was to change that line to
strcpy(name,newpath);
the previous line made a binding between the name and the realpath... which can be ok if we were not dealing with softlinks. When dereferencing a subpath, this binding ruined everything.
Thanks for the attempts

How to return a clean error on incorrect mount with VTreeFS?

When trying to mount a VTreeFS filesystem with a set of arguments (by using options -o when mounting) we want to let it fail cleanly if the user doesn't use the predefined arguments correctly. Currently we get this nasty error message when we do not mount the filesystem and let the main return 0. We basically want the filesystem to not be mounted if the arguments are inorrect
Current situations
mount -t filesystemtest -o testarguments none /mnt/filesystemtest
Arguments invalid
RS: service 'fs_00021' exited uring initialization
filesystemtest 109710 0xab6e 0x65f1 0x618d 0x6203 0x98ba 0x1010
Request to RS failed: unknown error (error 302)
mount: couldn't run /bin/sercie up /sbin/filesystemtest -label 'fs_00021'-args ''
mount: Can't mount none on /mnt/filesystemtest/: unknown error
Preferred situation
mount -t filesystemtest -o testarguments none /mnt/filesystemtest
Arguments invalid
Basically we wan't to know how to return a clean error message, when not calling start_vtreefs like below. The below example is not our actualy code and doesn't actually use arguments, but as an example there should be a way to have this piece of code to fail always. (sorry for that):
#include <minix/drivers.h>
#include <minix/vtreefs.h>
#include <sys/stat.h>
#include <time.h>
#include <assert.h>
static void my_init_hook(void)
{
/* This hook will be called once, after VTreeFS has initialized.
*/
struct inode_stat file_stat;
struct inode *inode;
/* We create one regular file in the root directory. The file is
* readable by everyone, and owned by root. Its size as returned by for
* example stat() will be zero, but that does not mean it is empty.
* For files with dynamically generated content, the file size is
* typically set to zero.
*/
file_stat.mode = S_IFREG | 0444;
file_stat.uid = 0;
file_stat.gid = 0;
file_stat.size = 0;
file_stat.dev = NO_DEV;
/* Now create the actual file. It is called "test" and does not have an
* index number. Its callback data value is set to 1, allowing it to be
* identified with this number later.
*/
inode = add_inode(get_root_inode(), "test", NO_INDEX, &file_stat, 0,
(cbdata_t) 1);
assert(inode != NULL);
}
static int my_read_hook(struct inode *inode, off_t offset, char **ptr,
size_t *len, cbdata_t cbdata)
{
/* This hook will be called every time a regular file is read. We use
* it to dyanmically generate the contents of our file.
*/
static char data[26];
const char *str;
time_t now;
/* We have only a single file. With more files, cbdata may help
* distinguishing between them.
*/
assert((int) cbdata == 1);
/* Generate the contents of the file into the 'data' buffer. We could
* use the return value of ctime() directly, but that would make for a
* lousy example.
*/
time(&now);
str = ctime(&now);
strcpy(data, str);
/* If the offset is beyond the end of the string, return EOF. */
if (offset > strlen(data)) {
*len = 0;
return OK;
}
/* Otherwise, return a pointer into 'data'. If necessary, bound the
* returned length to the length of the rest of the string. Note that
* 'data' has to be static, because it will be used after this function
* returns.
*/
*ptr = data + offset;
if (*len > strlen(data) - offset)
*len = strlen(data) - offset;
return OK;
}
/* The table with callback hooks. */
struct fs_hooks my_hooks = {
my_init_hook,
NULL, /* cleanup_hook */
NULL, /* lookup_hook */
NULL, /* getdents_hook */
my_read_hook,
NULL, /* rdlink_hook */
NULL /* message_hook */
};
int main(int argc, char* argv[])
{
/* The call above never returns. This just keeps the compiler happy. */
if (argc == 1) {
// We want it to fail right now!!!!
printf("Arguments invalid. (pass option with -o)");
}
else {
struct inode_stat root_stat;
/* Fill in the details to be used for the root inode. It will be a
* directory, readable and searchable by anyone, and owned by root.
*/
root_stat.mode = S_IFDIR | 0555;
root_stat.uid = 0;
root_stat.gid = 0;
root_stat.size = 0;
root_stat.dev = NO_DEV;
/* Now start VTreeFS. Preallocate 10 inodes, which is more than we'll
* need for this example. No indexed entries are used.
*/
start_vtreefs(&my_hooks, 10, &root_stat, 0);
}
return 0;
}

Resources