运行过程中的过载符号(LD_ preLOAD attachement) [英] Overload symbols of running process (LD_PRELOAD attachement)

查看:237
本文介绍了运行过程中的过载符号(LD_ preLOAD attachement)的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我的工作堆分析器用于Linux,名为 heaptrack 。目前,我靠 LD_ preLOAD 超载各种(解)的分配功能,而且运作非常良好。

I'm working on a heap profiler for Linux, called heaptrack. Currently, I rely on LD_PRELOAD to overload various (de-)allocation functions, and that works extremely well.

现在我想扩展到允许在运行时附加到现有进程,这是没有启动 LD_ preLOAD ING我的工具的工具。我能的dlopen 通过GDB我的图书馆就好了,但是这不会覆盖的malloc 等。我认为,这是因为在该点的连接体已经解决了已经运行的进程的位置相关code - ?正确

Now I would like to extend the tool to allow runtime attaching to an existing process, which was started without LD_PRELOADing my tool. I can dlopen my library via GDB just fine, but that won't overwrite malloc etc. I think, this is because at that point the linker already resolved the position dependent code of the already running process - correct?

所以,我该怎么办,而不是超载的malloc 和朋友吗?

So what do I do instead to overload malloc and friends?

我不精通汇编code。从我迄今为止看过了,我想我会以某种方式需要修补的malloc 和其他功能,例如,他们第一次打电话到我的跟踪功能,然后继续与他们的实际执行?那是对的吗?我该怎么做呢?

I am not proficient with assembler code. From what I've read so far, I guess I'll somehow have to patch malloc and the other functions, such that they first call back to my trace function and then continue with their actual implementation? Is that correct? How do I do that?

我希望有现有的工具,在那里,或者说我可以利用该GDB / ptrace的。

I hope there are existing tools out there, or that I can leverage GDB/ptrace for that.

感谢

推荐答案

只是为了lulz,没有ptracing自己的过程或触摸组件的单行或玩弄另一种解决方案的/ proc 。你只需要加载库的过程中的情况下,让魔术发生。

Just for the lulz, another solution without ptracing your own process or touching a single line of assembly or playing around with /proc. You only have to load the library in the context of the process and let the magic happen.

我提出的解决方案是使用的构造的功能(通过GCC从C ++带到C)当加载库运行一些code。然后,这个库只是修补了的malloc 的GOT(全局偏移表)条目。该GOT存储的库函数的真正地址,以便名称解析只发生一次。要修改GOT你要玩的ELF结构(见 5人小精灵)。而Linux是一种足以给你的辅助向量(见男子3 getauxval ),告诉你在哪里可以找到当前程序的内存的程序标题的。 Althought dl_iterate_phdr 给出了一个更好的界面。

The solution I propose is to use the constructor feature (brought from C++ to C by gcc) to run some code when a library is loaded. Then this library just patch the GOT (Global Offset Table) entry for malloc. The GOT stores the real addresses for the library functions so that the name resolution happen only once. To patch the GOT you have to play around with the ELF structures (see man 5 elf). And Linux is kind enough to give you the aux vector (see man 3 getauxval) that tells you where to find in memory the program headers of the current program. Althought dl_iterate_phdr gives a better interface.

下面是图书馆的一个例子code,它正是这样做的,当的init 函数被调用。也是同样的看法或许可以用gdb的脚本来实现。

Here is an example code of library that does exactly this when the init function is called. Thought the same could probably be achieved with a gdb script.

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dlfcn.h>
#include <sys/auxv.h>
#include <elf.h>
#include <link.h>
#include <sys/mman.h>


struct strtab {
    char *tab;
    ElfW(Xword) size;
};


struct jmpreltab {
    ElfW(Rela) *tab;
    ElfW(Xword) size;
};


struct symtab {
    ElfW(Sym) *tab;
    ElfW(Xword) entsz;
};



/* Backup of the real malloc function */
static void *(*realmalloc)(size_t) = NULL;


/* My local versions of the malloc functions */
static void *mymalloc(size_t size);


/*************/
/* ELF stuff */
/*************/
static const ElfW(Phdr) *get_phdr_dynamic(const ElfW(Phdr) *phdr,
        uint16_t phnum, uint16_t phentsize) {
    int i;

    for (i = 0; i < phnum; i++) {
        if (phdr->p_type == PT_DYNAMIC)
            return phdr;
        phdr = (ElfW(Phdr) *)((char *)phdr + phentsize);
    }

    return NULL;
}



static const ElfW(Dyn) *get_dynentry(ElfW(Addr) base, const ElfW(Phdr) *pdyn,
        uint32_t type) {
    ElfW(Dyn) *dyn;

    for (dyn = (ElfW(Dyn) *)(base + pdyn->p_vaddr); dyn->d_tag; dyn++) {
        if (dyn->d_tag == type)
            return dyn;
    }

    return NULL;
}



static struct jmpreltab get_jmprel(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
    struct jmpreltab table;
    const ElfW(Dyn) *dyn;

    dyn = get_dynentry(base, pdyn, DT_JMPREL);
    table.tab = (dyn == NULL) ? NULL : (ElfW(Rela) *)dyn->d_un.d_ptr;

    dyn = get_dynentry(base, pdyn, DT_PLTRELSZ);
    table.size = (dyn == NULL) ? 0 : dyn->d_un.d_val;
    return table;
}



static struct symtab get_symtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
    struct symtab table;
    const ElfW(Dyn) *dyn;

    dyn = get_dynentry(base, pdyn, DT_SYMTAB);
    table.tab = (dyn == NULL) ? NULL : (ElfW(Sym) *)dyn->d_un.d_ptr;
    dyn = get_dynentry(base, pdyn, DT_SYMENT);
    table.entsz = (dyn == NULL) ? 0 : dyn->d_un.d_val;
    return table;
}



static struct strtab get_strtab(ElfW(Addr) base, const ElfW(Phdr) *pdyn) {
    struct strtab table;
    const ElfW(Dyn) *dyn;

    dyn = get_dynentry(base, pdyn, DT_STRTAB);
    table.tab = (dyn == NULL) ? NULL : (char *)dyn->d_un.d_ptr;
    dyn = get_dynentry(base, pdyn, DT_STRSZ);
    table.size = (dyn == NULL) ? 0 : dyn->d_un.d_val;
    return table;
}



static void *get_got_entry(ElfW(Addr) base, struct jmpreltab jmprel,
        struct symtab symtab, struct strtab strtab, const char *symname) {

    ElfW(Rela) *rela;
    ElfW(Rela) *relaend;

    relaend = (ElfW(Rela) *)((char *)jmprel.tab + jmprel.size);
    for (rela = jmprel.tab; rela < relaend; rela++) {
        uint32_t relsymidx;
        char *relsymname;
        relsymidx = ELF64_R_SYM(rela->r_info);
        relsymname = strtab.tab + symtab.tab[relsymidx].st_name;

        if (strcmp(symname, relsymname) == 0)
            return (void *)(base + rela->r_offset);
    }

    return NULL;
}



static void patch_got(ElfW(Addr) base, const ElfW(Phdr) *phdr, int16_t phnum,
        int16_t phentsize) {

    const ElfW(Phdr) *dphdr;
    struct jmpreltab jmprel;
    struct symtab symtab;
    struct strtab strtab;
    void *(**mallocgot)(size_t);

    dphdr = get_phdr_dynamic(phdr, phnum, phentsize);
    jmprel = get_jmprel(base, dphdr);
    symtab = get_symtab(base, dphdr);
    strtab = get_strtab(base, dphdr);
    mallocgot = get_got_entry(base, jmprel, symtab, strtab, "malloc");

    /* Replace the pointer with our version. */
    if (mallocgot != NULL) {
        /* Quick & dirty hack for some programs that need it. */
        /* Should check the returned value. */
        void *page = (void *)((intptr_t)mallocgot & ~(0x1000 - 1));
        mprotect(page, 0x1000, PROT_READ | PROT_WRITE);
        *mallocgot = mymalloc;
    }
}



static int callback(struct dl_phdr_info *info, size_t size, void *data) {
    uint16_t phentsize;
    data = data;
    size = size;

    printf("Patching GOT entry of \"%s\"\n", info->dlpi_name);
    phentsize = getauxval(AT_PHENT);
    patch_got(info->dlpi_addr, info->dlpi_phdr, info->dlpi_phnum, phentsize);

    return 0;
}



/*****************/
/* Init function */
/*****************/
__attribute__((constructor)) static void init(void) {
    realmalloc = malloc;
    dl_iterate_phdr(callback, NULL);
}



/*********************************************/
/* Here come the malloc function and sisters */
/*********************************************/
static void *mymalloc(size_t size) {
    printf("hello from my malloc\n");
    return realmalloc(size);
}

和刚加载两个的malloc 之间的调用库中的示例程序。

And an example program that just loads the library between two malloc calls.

#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>



void loadmymalloc(void) {
    /* Should check return value. */
    dlopen("./mymalloc.so", RTLD_LAZY);
}



int main(void) {
    void *ptr;

    ptr = malloc(42);
    printf("malloc returned: %p\n", ptr);

    loadmymalloc();

    ptr = malloc(42);
    printf("malloc returned: %p\n", ptr);

    return EXIT_SUCCESS;
}

则mprotect 呼叫通常是没用的。不过,我发现gvim的(这是编译为一个共享对象)需要它。如果你也想搭上的malloc引用为指针(这可能会允许在以后调用真正的功能和旁路你的),你可以应用同样的过程中对符号表格由 DT_RELA 动态项指向的。

The call to mprotect is usually useless. However I found that gvim (which is compiled as a shared object) needs it. If you also want to catch the references to malloc as pointers (which may allow to later call the real function and bypass yours), you can apply the very same process to the symbol table pointed to by the DT_RELA dynamic entry.

如果在构造的功能并不适用于你,你所要做的就是解决新加载的库,并调用的init 符号吧。

If the constructor feature is not available for you, all you have to do is resolve the init symbol from the newly loaded library and call it.

请注意,你可能还需要替换的dlopen 让你得到修补,以及后装库。如果你很早就加载库,它可能会发生,或者如果应用程序动态加载的插件。

Note that you may also want to replace dlopen so that libraries loaded after yours gets patched as well. Which may happen if you load your library quite early or if the application has dynamically loaded plugins.

这篇关于运行过程中的过载符号(LD_ preLOAD attachement)的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆