I'm working on a heap profiler for Linux, called heaptrack. Currently, I rely on LD_PRELOAD to overload various (de-)allocation functions, and that works extremely well.
Now I would like to extend the tool to allow runtime attaching to an existing process, which was started without LD_PRELOADing my tool. I can dlopen my library via GDB just fine, but that won't overwrite malloc etc. I think, this is because at that point the linker already resolved the position dependent code of the already running process - correct?
So what do I do instead to overload malloc and friends?
I am not proficient with assembler code. From what I've read so far, I guess I'll somehow have to patch malloc and the other functions, such that they first call back to my trace function and then continue with their actual implementation? Is that correct? How do I do that?
I hope there are existing tools out there, or that I can leverage GDB/ptrace for that.
Just for the lulz, another solution without ptracing your own process or touching a single line of assembly or playing around with /proc. You only have to load the library in the context of the process and let the magic happen.
The solution I propose is to use the constructor feature (brought from C++ to C by gcc) to run some code when a library is loaded. Then this library just patch the GOT (Global Offset Table) entry for malloc. The GOT stores the real addresses for the library functions so that the name resolution happen only once. To patch the GOT you have to play around with the ELF structures (see man 5 elf). And Linux is kind enough to give you the aux vector (see man 3 getauxval) that tells you where to find in memory the program headers of the current program. However, better interface is provided by dl_iterate_phdr, which is used below.
Here is an example code of library that does exactly this when the init function is called. Although the same could probably be achieved with a gdb script.
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dlfcn.h>
#include <sys/auxv.h>
#include <elf.h>
#include <link.h>
#include <sys/mman.h>
struct strtab {
char *tab;
ElfW(Xword) size;
};
struct jmpreltab {
ElfW(Rela) *tab;
ElfW(Xword) size;
};
struct symtab {
ElfW(Sym) *tab;
ElfW(Xword) entsz;
};
/* Backup of the real malloc function */
static void *(*realmalloc)(size_t) = NULL;
/* My local versions of the malloc functions */
static void *mymalloc(size_t size);
/*************/
/* ELF stuff */
/*************/
static const ElfW(Phdr) *get_phdr_dynamic(const ElfW(Phdr) *phdr,
uint16_t phnum, uint16_t phentsize) {
int i;
for (i = 0; i < phnum; i++) {
if (phdr->p_type == PT_DYNAMIC)
return phdr;
phdr = (ElfW(Phdr) *)((char *)phdr + phentsize);
}
return NULL;
}
static const ElfW(Dyn) *get_dynentry(ElfW(Addr) base, const ElfW(Phdr) *pdyn,
uint32_t type) {
ElfW(Dyn) *dyn;
for (dyn = (ElfW(Dyn) *)(base + pdyn->p_vaddr); dyn->d_tag; dyn++) {
if (dyn->d_tag == type)
return dyn;
}
return NULL;
}
static struct jmpreltab get_jmprel(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
struct jmpreltab table;
const ElfW(Dyn) *dyn;
dyn = get_dynentry(base, pdyn, DT_JMPREL);
table.tab = (dyn == NULL) ? NULL : (ElfW(Rela) *)dyn->d_un.d_ptr;
dyn = get_dynentry(base, pdyn, DT_PLTRELSZ);
table.size = (dyn == NULL) ? 0 : dyn->d_un.d_val;
return table;
}
static struct symtab get_symtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
struct symtab table;
const ElfW(Dyn) *dyn;
dyn = get_dynentry(base, pdyn, DT_SYMTAB);
table.tab = (dyn == NULL) ? NULL : (ElfW(Sym) *)dyn->d_un.d_ptr;
dyn = get_dynentry(base, pdyn, DT_SYMENT);
table.entsz = (dyn == NULL) ? 0 : dyn->d_un.d_val;
return table;
}
static struct strtab get_strtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
struct strtab table;
const ElfW(Dyn) *dyn;
dyn = get_dynentry(base, pdyn, DT_STRTAB);
table.tab = (dyn == NULL) ? NULL : (char *)dyn->d_un.d_ptr;
dyn = get_dynentry(base, pdyn, DT_STRSZ);
table.size = (dyn == NULL) ? 0 : dyn->d_un.d_val;
return table;
}
static void *get_got_entry(ElfW(Addr) base, struct jmpreltab jmprel,
struct symtab symtab, struct strtab strtab, const char *symname) {
ElfW(Rela) *rela;
ElfW(Rela) *relaend;
relaend = (ElfW(Rela) *)((char *)jmprel.tab + jmprel.size);
for (rela = jmprel.tab; rela < relaend; rela++) {
uint32_t relsymidx;
char *relsymname;
relsymidx = ELF64_R_SYM(rela->r_info);
relsymname = strtab.tab + symtab.tab[relsymidx].st_name;
if (strcmp(symname, relsymname) == 0)
return (void *)(base + rela->r_offset);
}
return NULL;
}
static void patch_got(ElfW(Addr) base, const ElfW(Phdr) *phdr, int16_t phnum,
int16_t phentsize) {
const ElfW(Phdr) *dphdr;
struct jmpreltab jmprel;
struct symtab symtab;
struct strtab strtab;
void *(**mallocgot)(size_t);
dphdr = get_phdr_dynamic(phdr, phnum, phentsize);
jmprel = get_jmprel(base, dphdr);
symtab = get_symtab(base, dphdr);
strtab = get_strtab(base, dphdr);
mallocgot = get_got_entry(base, jmprel, symtab, strtab, "malloc");
/* Replace the pointer with our version. */
if (mallocgot != NULL) {
/* Quick & dirty hack for some programs that need it. */
/* Should check the returned value. */
void *page = (void *)((intptr_t)mallocgot & ~(0x1000 - 1));
mprotect(page, 0x1000, PROT_READ | PROT_WRITE);
*mallocgot = mymalloc;
}
}
static int callback(struct dl_phdr_info *info, size_t size, void *data) {
uint16_t phentsize;
data = data;
size = size;
printf("Patching GOT entry of \"%s\"\n", info->dlpi_name);
phentsize = getauxval(AT_PHENT);
patch_got(info->dlpi_addr, info->dlpi_phdr, info->dlpi_phnum, phentsize);
return 0;
}
/*****************/
/* Init function */
/*****************/
__attribute__((constructor)) static void init(void) {
realmalloc = malloc;
dl_iterate_phdr(callback, NULL);
}
/*********************************************/
/* Here come the malloc function and sisters */
/*********************************************/
static void *mymalloc(size_t size) {
printf("hello from my malloc\n");
return realmalloc(size);
}
And an example program that just loads the library between two malloc calls.
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
void loadmymalloc(void) {
/* Should check return value. */
dlopen("./mymalloc.so", RTLD_LAZY);
}
int main(void) {
void *ptr;
ptr = malloc(42);
printf("malloc returned: %p\n", ptr);
loadmymalloc();
ptr = malloc(42);
printf("malloc returned: %p\n", ptr);
return EXIT_SUCCESS;
}
The call to mprotect is usually useless. However I found that gvim (which is compiled as a shared object) needs it. If you also want to catch the references to malloc as pointers (which may allow to later call the real function and bypass yours), you can apply the very same process to the symbol table pointed to by the DT_RELA dynamic entry.
If the constructor feature is not available for you, all you have to do is resolve the init symbol from the newly loaded library and call it.
Note that you may also want to replace dlopen so that libraries loaded after yours gets patched as well. Which may happen if you load your library quite early or if the application has dynamically loaded plugins.
This can not be done without tweaking with assembler a bit. Basically, you will have to do what gdb and ltrace do: find malloc and friends virtual addresses in the process image and put breakpoints at their entry. This process usually involves temporary rewriting the executable code, as you need to replace normal instructions with "trap" ones (such as int 3 on x86).
If you want to avoid doing this yourself, there exists linkable wrapper around gdb (libgdb) or you can build ltrace as a library (libltrace). As ltrace is much smaller, and the library variety of it is available out of the box, it will probably allow you to do what you want at lower effort.
For example, here's the best part of the "main.c" file from the ltrace package:
int
main(int argc, char *argv[]) {
ltrace_init(argc, argv);
/*
ltrace_add_callback(callback_call, EVENT_SYSCALL);
ltrace_add_callback(callback_ret, EVENT_SYSRET);
ltrace_add_callback(endcallback, EVENT_EXIT);
But you would probably need EVENT_LIBCALL and EVENT_LIBRET
*/
ltrace_main();
return 0;
}
http://anonscm.debian.org/cgit/collab-maint/ltrace.git/tree/?id=0.7.3
Related
The GL loader generated with GLLoadGen contains the following code:
static void* AppleGLGetProcAddress (const GLubyte *name)
{
static const struct mach_header* image = NULL;
NSSymbol symbol;
char* symbolName;
if (NULL == image)
{
image = NSAddImage("/System/Library/Frameworks/OpenGL.framework/Versions/Current/OpenGL", NSADDIMAGE_OPTION_RETURN_ON_ERROR);
}
/* prepend a '_' for the Unix C symbol mangling convention */
symbolName = (char*)malloc(strlen((const char*)name) + 2);
strcpy(symbolName+1, (const char*)name);
symbolName[0] = '_';
symbol = NULL;
/* if (NSIsSymbolNameDefined(symbolName))
symbol = NSLookupAndBindSymbol(symbolName); */
symbol = image ? NSLookupSymbolInImage(image, symbolName, NSLOOKUPSYMBOLINIMAGE_OPTION_BIND | NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR) : NULL;
free(symbolName);
return symbol ? NSAddressOfSymbol(symbol) : NULL;
}
This function apparently loads the address of the OpenGL function name. However the calls to NSAddImage and related are deprecated since OSX 10.5. What is the current (non-deprecated) approach to load the function addresses?
dlopen(), dlsym() and friends are probably the best option. This will also make your code more portable, since they are available on most Unix systems as well.
Following is some code I've used in the past to load GL functions on Mac OSX (tested on version 10.8, should still work OK for Mavericks):
// Handle to the OpenGL dynlib:
static void * glLibrary;
// Path to GL on OSX. True for version 10.8.
static const char * glLibPath = "/System/Library/Frameworks/OpenGL.framework/Versions/Current/OpenGL";
// Loads the OpenGL library:
void LoadGL()
{
// Load GL dynlib:
glLibrary = dlopen(glLibPath, RTLD_LAZY);
if (!glLibrary)
{
const char * error = dlerror();
// handle error
}
}
// Unloads the GL library:
void UnloadGL()
{
if (glLibrary)
{
if (dlclose(glLibrary) != 0)
{
// problem...
}
glLibrary = NULL;
}
}
// Grab a function pointer form the previously loaded GL library:
void * GetGLFunctionPointer(const char * funcName)
{
assert(funcName != NULL);
assert(glLibrary != NULL && "OpenGL dynlib not yet loaded!");
return dlsym(glLibrary, funcName);
}
// Usage example:
int main()
{
LoadGL();
void * fn = GetGLFunctionPointer("glEnable");
assert(fn != NULL);
UnloadGL();
return 0;
}
The revelant code may be found here: http://pastebin.com/VbhtQckm
The problem is at line
85. pthread_mutex_lock(ID_retrieval_pool->info->lock);
I'm running the server and it's getting stuck at lock. The memory is allocated, I'm initializing the mutex and it's the only thread who's owning that shared memory.
I did debug with GDB and Valgrind using helgrind tool but did not find any clue.
Possible problems which think may cause this:
mutex is not being initialized (I'm using a block is shared memory which I'm initializing as a mutex);
deadlock? in the man page https://www.sourceware.org/pthreads-
win32/manual/pthread_mutex_init.html says this can cause this;
Please note that this code is for learning purpose.
Edit, the code is:
// common_header.h + common_header.c
#ifndef DATA_TYPES_H
#define DATA_TYPES_H
#include <pthread.h>
#include <errno.h>
#define RETREIVE_ID_KEY 1
typedef enum {
SHM_State_None,
SHM_State_ID_Available,
SHM_State_ID_Not_Available,
} SHM_State;
typedef struct {
pthread_mutex_t *lock; // locked if any thread is modifying data
SHM_State state;
} data_state;
typedef int shmid_t;
typedef struct data_pool {
data_state *info;
shmid_t shm_id;
} data_pool;
// other data structures
extern data_state * data_state_initialize_by_setting_address(void *address)
{
data_state *data = (data_state *)address;
data->lock = (pthread_mutex_t *)address;
pthread_mutex_init(data->lock, NULL);
data->state = SHM_State_None;
return data;
}
extern data_pool * data_pool_initialize_by_setting_address(void *address)
{
data_pool *data = (data_pool *)address;
data->info = data_state_initialize_by_setting_address(address);
data->shm_id = 0; // invalid though, the structure's client has to set a valid one
return data;
}
// other initialization functions
#endif // DATA_TYPES_H
///----------------------------------------------------------------------------------------\\\
// main.c -- Server
#include "common_header.h"
#define SHM_INVALID_ADDRESS (void *)-1
#define SHMGET_RW_FLAGS 0666
#define SHMAT_RW_FLAGS 0
bool initialize_data();
static data_pool *ID_retrieval_pool = NULL;
int main(int argc, char *argv[])
{
if (!initialize_data()) {
return EXIT_FAILURE;
}
// Do other stuff
return EXIT_SUCCESS;
}
bool initialize_data()
{
// some irrelevant initialization code
shmid_t shm_ID = shmget(RETREIVE_ID_KEY,
sizeof(data_pool),
IPC_CREAT | SHMGET_RW_FLAGS);
void *shm_address = shmat(shm_ID, NULL, SHMAT_RW_FLAGS);
if (shm_address == SHM_INVALID_ADDRESS) {
return false;
}
ID_retrieval_pool = data_pool_initialize_by_setting_address(shm_address);
pthread_mutex_lock(ID_retrieval_pool->info->lock);
ID_retrieval_pool->shm_id = get_shared_ID();
ID_retrieval_pool->info->state = SHM_State_ID_Available;
pthread_mutex_unlock(ID_retrieval_pool->info->lock);
// other initialization code
return true;
}
You have an interesting and incorrect way of initializing the mutex:
data->lock = (pthread_mutex_t *)address; /* address == &data */
pthread_mutex_init(data->lock, NULL);
In your code address is the address of the outer struct: this does not actually allocate a usable block of memory.
I suggest you just make the mutex non-pointer and then initialize it:
/* In the struct. */
pthread_mutex_t lock;
/* In your function. */
pthread_mutex_init(&data->lock, NULL);
I have written a straightforward C code that uses an engine to run two different algorithms depending on user input. It uses function pointers to the algorithm methods and objects. There is a nasty memory bug somewhere that I can not track down, so maybe I am allocating memory in the wrong way. What is going wrong?
Below is (the relevant parts of) a minimal working example of the code.
main.c
#include "engine.h"
int main()
{
char *id = "one";
Engine_t eng;
Engine_init(&eng);
Engine_select_algorithm(eng, id);
Engine_run(eng);
}
engine.h
typedef struct _Engine *Engine_t;
engine.c
#include "engine.h"
#include "algorithm_one.h"
#include "algorithm_two.h"
typedef struct _Engine
{
void *p_algorithm;
void (*init)(Engine_t);
void (*run)(Engine_t);
} Engine;
void Engine_init(Engine_t *eng)
{
*eng = malloc(sizeof(Engine));
(*eng)->p_algorithm = NULL;
}
void Engine_select_algorithm(Engine_t eng, char *id)
{
if ( strcmp(id, "one") == 0 )
{
eng->init = Algorithm_one_init;
eng->run = Algorithm_one_run;
}
else if ( strcmp(id, "two") == 0 )
{
eng->init = Algorithm_two_init;
eng->run = Algorithm_two_run;
}
else
{
printf("Unknown engine %s.\n", id); exit(0);
}
eng->init(eng);
}
void Engine_run(Engine_t eng)
{
eng->run(eng);
}
void Engine_set_algorithm(Engine_t eng, void *p)
{
eng->p_algorithm = p;
}
void Engine_get_algorithm(Engine_t eng, void *p)
{
p = eng->p_algorithm;
}
algorithm_one.h
typedef struct _A_one *A_one_t;
algorithm_one.c
#include "engine.h"
#include "algorithm_one.h"
typedef struct _A_one
{
float value;
} A_one;
void Algorithm_one_init(Engine_t eng)
{
A_one_t aone;
aone = malloc(sizeof(A_one));
aone->value = 13.0;
//int var = 10;
Engine_set_algorithm(eng, &aone);
}
void Algorithm_one_run(Engine_t eng)
{
A_one_t aone;
Engine_get_algorithm(eng, &aone);
printf("I am running algorithm one with value %f.\n", aone->value);
// The code for algorithm one goes here.
}
The code for algorithm_two.h and algorithm_two.c are identical to the algorithm one files.
There must be a memory bug involved, because the code runs as given, but if I uncomment the
//int var = 10;
line in algoritm_one.c the code crashes with a segmentation fault.
You pass the wrong thing to Engine_set_algorithm. You are passing the address of a local variable rather than the address of the algorithm. You need to write:
Engine_set_algorithm(eng, aone);
And also Engine_get_algorithm is wrong. You are passed a pointer by value and modify that pointer. So the caller cannot see that modification. You need it to be:
void Engine_get_algorithm(Engine_t eng, void **p)
{
*p = eng->p_algorithm;
}
I think your code would be easier if you defined a type to represent an algorithm. That type would be just a void*, but it would make the code much easier to read. What's more, I would make Engine_get_algorithm return the algorithm.
algorithm Engine_get_algorithm(Engine_t eng)
{
return eng->p_algorithm;
}
void Engine_set_algorithm(Engine_t eng, algorithm alg)
{
eng->p_algorithm = alg;
}
Is it possible to notify the module when one of it's sys files was changed? My task is to do a file which controls size of buffer inside the module, I want to resize the buffer when the value in the file is changed.
My other idea (if I can't notify the module) was to check the previous value each time the module is used and then resize the buffer.
Isn't this the purpose of Sysfs?
When you create a kobject and give it a representation in Sysfs (which is a directory), you then create attributes for that object which will become files in that directory. You provide a store and a show callback to the kobject, which are basically equivalents of resp. write and read.
store is what you want here. It looks like this:
ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
const char *buffer, size_t size);
You receive size bytes within buffer as soon as the virtual file is written in user land.
Have a look at this module which does it (taken from here):
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
struct my_attr {
struct attribute attr;
int value;
};
static struct my_attr my_first = {
.attr.name="first",
.attr.mode = 0644,
.value = 1,
};
static struct my_attr my_second = {
.attr.name="second",
.attr.mode = 0644,
.value = 2,
};
static struct attribute * myattr[] = {
&my_first.attr,
&my_second.attr,
NULL
};
static ssize_t default_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct my_attr *a = container_of(attr, struct my_attr, attr);
return scnprintf(buf, PAGE_SIZE, "%d\n", a->value);
}
static ssize_t default_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct my_attr *a = container_of(attr, struct my_attr, attr);
sscanf(buf, "%d", &a->value);
return sizeof(int);
}
static struct sysfs_ops myops = {
.show = default_show,
.store = default_store,
};
static struct kobj_type mytype = {
.sysfs_ops = &myops,
.default_attrs = myattr,
};
struct kobject *mykobj;
static int __init sysfsexample_module_init(void)
{
int err = -1;
mykobj = kzalloc(sizeof(*mykobj), GFP_KERNEL);
if (mykobj) {
kobject_init(mykobj, &mytype);
if (kobject_add(mykobj, NULL, "%s", "sysfs_sample")) {
err = -1;
printk("Sysfs creation failed\n");
kobject_put(mykobj);
mykobj = NULL;
}
err = 0;
}
return err;
}
static void __exit sysfsexample_module_exit(void)
{
if (mykobj) {
kobject_put(mykobj);
kfree(mykobj);
}
}
module_init(sysfsexample_module_init);
module_exit(sysfsexample_module_exit);
MODULE_LICENSE("GPL");
Also: you might want to output the buffer size to the user when the entry is read. This is usually the way of doing it. Also make sure the information (read and written) is in a human-readable format to keep up with the Unix philosophy.
Update: see this recent interesting article about Sysfs file creation written by Greg Kroah-Hartman, one of the top kernel developers.
im using libtcc to compile c code on the fly. Im going to use it on a cloud computer, to be used over the internet.
how do i use tinyc's built in memory and bound checker function?
heres an example that comes with the tinyc libtcc library?
any help would be great!
thank you!
/*
* Simple Test program for libtcc
*
* libtcc can be useful to use tcc as a "backend" for a code generator.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "libtcc.h"
/* this function is called by the generated code */
int add(int a, int b)
{
return a + b;
}
char my_program[] =
"int fib(int n)\n"
"{\n"
" if (n <= 2)\n"
" return 1;\n"
" else\n"
" return fib(n-1) + fib(n-2);\n"
"}\n"
"\n"
"int foo(int n)\n"
"{\n"
" printf(\"Hello World!\\n\");\n"
" printf(\"fib(%d) = %d\\n\", n, fib(n));\n"
" printf(\"add(%d, %d) = %d\\n\", n, 2 * n, add(n, 2 * n));\n"
" return 0;\n"
"}\n";
int main(int argc, char **argv)
{
TCCState *s;
int (*func)(int);
void *mem;
int size;
s = tcc_new();
if (!s) {
fprintf(stderr, "Could not create tcc state\n");
exit(1);
}
/* if tcclib.h and libtcc1.a are not installed, where can we find them */
if (argc == 2 && !memcmp(argv[1], "lib_path=",9))
tcc_set_lib_path(s, argv[1]+9);
/* MUST BE CALLED before any compilation */
tcc_set_output_type(s, TCC_OUTPUT_MEMORY);
if (tcc_compile_string(s, my_program) == -1)
return 1;
/* as a test, we add a symbol that the compiled program can use.
You may also open a dll with tcc_add_dll() and use symbols from that */
tcc_add_symbol(s, "add", add);
/* get needed size of the code */
size = tcc_relocate(s, NULL);
if (size == -1)
return 1;
/* allocate memory and copy the code into it */
mem = malloc(size);
tcc_relocate(s, mem);
/* get entry symbol */
func = tcc_get_symbol(s, "foo");
if (!func)
return 1;
/* delete the state */
tcc_delete(s);
/* run the code */
func(32);
free(mem);
return 0;
}
you can set bounds checking manually using:
s->do_bounds_check = 1; //s here is TCCState*
just make sure libtcc is compiled with CONFIG_TCC_BCHECK being defined.
you may also want to enable debugging using:
s->do_debug = 1;
the command line option -b does the exact same to enable bounds checking (it enables debugging as well).