如何在 x86 和 x64 中调用函数?(就像 C++ 中的 std::bind,但动态的) [英] How to thunk a function in x86 and x64? (Like std::bind in C++, but dynamic)

查看:24
本文介绍了如何在 x86 和 x64 中调用函数?(就像 C++ 中的 std::bind,但动态的)的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

如何在 x86 和 x64 上使用任意(固定)数量的参数对任意函数进行 thunk?

How do I thunk an arbitrary function with an arbitrary (fixed) number of arguments, on x86 and x64?

(我不需要浮点数、SSE 之类的.参数都是整数或指针.)

(I don't need floating-point, SSE, or the like. The arguments are all integers or pointers.)

推荐答案

这是我的通用实现.

我最初使用 AsmJit 制作它,然后手动修改它以移除依赖项.

Here's my generic implementation.

I initially made it with AsmJit, then modified it by hand to remove the dependency.

  • 它适用于 x86 和 x64!

  • It works for both x86 and x64!

它适用于 cdecl stdcall!
应该也适用于thiscall",无论是在 VC++ 还是 GCC 上,但我还没有测试过.
(VC++ 可能不会触及this"指针,而 GCC 会将其视为第一个参数.)

It works for both cdecl and stdcall!
It should also work for "thiscall", both on VC++ and GCC, but I haven't tested it.
(VC++ would probably not touch the 'this' pointer, whereas GCC would treat it as the first argument.)

它可以在参数列表的任意位置绑定任意个参数!

It can bind an arbitrary number of arguments at any position in the parameter list!

当心:

  • 不适用于可变参数函数,例如printf.
    这样做要么需要您动态提供参数的数量(这很痛苦),要么需要您将返回指针存储在堆栈以外的其他地方,这很复杂.

  • It does not work for variadic functions, like printf.
    Doing so would either require you to provide the number of arguments dynamically (which is painful) or would require you to store the return-pointers somewhere other than the stack, which is complicated.

不是为超高性能而设计的,但它应该仍然足够快.
速度是 O(总参数数),不是 O(绑定参数数).

It was not designed for ultra-high performance, but it should still be fast enough.
The speed is O(total parameter count), not O(bound parameter count).

#include <stddef.h>

size_t vbind(
    void *(/* cdecl, stdcall, or thiscall */ *f)(), size_t param_count,
    unsigned char buffer[/* >= 128 + n * (5 + sizeof(int) + sizeof(void*)) */],
    size_t const i, void *const bound[], unsigned int const n, bool const thiscall)
{
    unsigned char *p = buffer;
    unsigned char s = sizeof(void *);
    unsigned char b = sizeof(int) == sizeof(void *) ? 2 : 3;  // log2(sizeof(void *))
    *p++ = 0x55;                                                                          // push     rbp
    if (b > 2) { *p++ = 0x48; } *p++ = 0x8B; *p++ = 0xEC;                                 // mov      rbp, rsp
    if (b > 2)
    {
        *p++ = 0x48; *p++ = 0x89; *p++ = 0x4C; *p++ = 0x24; *p++ = 2 * s;                 // mov      [rsp + 2 * s], rcx
        *p++ = 0x48; *p++ = 0x89; *p++ = 0x54; *p++ = 0x24; *p++ = 3 * s;                 // mov      [rsp + 3 * s], rdx
        *p++ = 0x4C; *p++ = 0x89; *p++ = 0x44; *p++ = 0x24; *p++ = 4 * s;                 // mov      [rsp + 4 * s], r8
        *p++ = 0x4C; *p++ = 0x89; *p++ = 0x4C; *p++ = 0x24; *p++ = 5 * s;                 // mov      [rsp + 5 * s], r9
    }
    if (b > 2) { *p++ = 0x48; } *p++ = 0xBA; *(*(size_t **)&p)++ = param_count;           // mov      rdx, <param_count>
    if (b > 2) { *p++ = 0x48; } *p++ = 0x8B; *p++ = 0xC2;                                 // mov      rax, rdx
    if (b > 2) { *p++ = 0x48; } *p++ = 0xC1; *p++ = 0xE0; *p++ = b;                       // shl      rax, log2(sizeof(void *))
    if (b > 2) { *p++ = 0x48; } *p++ = 0x2B; *p++ = 0xE0;                                 // sub      rsp, rax
    *p++ = 0x57;                                                                          // push     rdi
    *p++ = 0x56;                                                                          // push     rsi
    *p++ = 0x51;                                                                          // push     rcx
    *p++ = 0x9C;                                                                          // pushfq
    if (b > 2) { *p++ = 0x48; } *p++ = 0xF7; *p++ = 0xD8;                                 // neg      rax
    if (b > 2) { *p++ = 0x48; } *p++ = 0x8D; *p++ = 0x7C; *p++ = 0x05; *p++ = 0x00;       // lea      rdi, [rbp + rax]
    if (b > 2) { *p++ = 0x48; } *p++ = 0x8D; *p++ = 0x75; *p++ = 2 * s;                   // lea      rsi, [rbp + 10h]
    if (b > 2) { *p++ = 0x48; } *p++ = 0xB9; *(*(size_t **)&p)++ = i;                     // mov      rcx, <i>
    if (b > 2) { *p++ = 0x48; } *p++ = 0x2B; *p++ = 0xD1;                                 // sub      rdx, rcx
    *p++ = 0xFC;                                                                          // cld
    *p++ = 0xF3; if (b > 2) { *p++ = 0x48; } *p++ = 0xA5;                                 // rep movs [rdi], [rsi]
    for (unsigned int j = 0; j < n; j++)
    {
        unsigned int const o = j * sizeof(p);
        if (b > 2) { *p++ = 0x48; } *p++ = 0xB8; *(*(void ***)&p)++ = bound[j];           // mov      rax, <arg>
        if (b > 2) { *p++ = 0x48; } *p++ = 0x89; *p++ = 0x87; *(*(int **)&p)++ = o;       // mov      [rdi + <iArg>], rax
    }
    if (b > 2) { *p++ = 0x48; } *p++ = 0xB8; *(*(size_t **)&p)++ = n;                     // mov      rax, <count>
    if (b > 2) { *p++ = 0x48; } *p++ = 0x2B; *p++ = 0xD0;                                 // sub      rdx, rax
    if (b > 2) { *p++ = 0x48; } *p++ = 0xC1; *p++ = 0xE0; *p++ = b;                       // shl      rax, log2(sizeof(void *))
    if (b > 2) { *p++ = 0x48; } *p++ = 0x03; *p++ = 0xF8;                                 // add      rdi, rax
    if (b > 2) { *p++ = 0x48; } *p++ = 0x8B; *p++ = 0xCA;                                 // mov      rcx, rdx
    *p++ = 0xF3; if (b > 2) { *p++ = 0x48; } *p++ = 0xA5;                                 // rep movs [rdi], [rsi]
    *p++ = 0x9D;                                                                          // popfq
    *p++ = 0x59;                                                                          // pop      rcx
    *p++ = 0x5E;                                                                          // pop      rsi
    *p++ = 0x5F;                                                                          // pop      rdi
    if (b > 2)
    {
        *p++ = 0x48; *p++ = 0x8B; *p++ = 0x4C; *p++ = 0x24; *p++ = 0 * s;                 // mov      rcx, [rsp + 0 * s]
        *p++ = 0x48; *p++ = 0x8B; *p++ = 0x54; *p++ = 0x24; *p++ = 1 * s;                 // mov      rdx, [rsp + 1 * s]
        *p++ = 0x4C; *p++ = 0x8B; *p++ = 0x44; *p++ = 0x24; *p++ = 2 * s;                 // mov      r8 , [rsp + 2 * s]
        *p++ = 0x4C; *p++ = 0x8B; *p++ = 0x4C; *p++ = 0x24; *p++ = 3 * s;                 // mov      r9 , [rsp + 3 * s]
        *p++ = 0x48; *p++ = 0xB8; *(*(void *(***)())&p)++ = f;                            // mov      rax, <target_ptr>
        *p++ = 0xFF; *p++ = 0xD0;                                                         // call     rax
    }
    else
    {
        if (thiscall) { *p++ = 0x59; }                                                    // pop      rcx
        *p++ = 0xE8; *(*(ptrdiff_t **)&p)++ = (unsigned char *)f - p
#ifdef _MSC_VER
                - s  // for unknown reasons, GCC doesn't like this
#endif
            ;                                                                             // call     <fn_rel>
    }
    if (b > 2) { *p++ = 0x48; } *p++ = 0x8B; *p++ = 0xE5;                                            // mov      rsp, rbp
    *p++ = 0x5D;                                                                          // pop      rbp
    *p++ = 0xC3;                                                                          // ret
    return p - &buffer[0];
}

示例(适用于 Windows):

#include <assert.h>
#include <stdio.h>
#include <Windows.h>
void *__cdecl test(void *value, void *x, void *y, void *z, void *w, void *u)
{
    if (u > 0) { test(value, x, y, z, w, (void *)((size_t)u - 1)); }
    printf("Test called! %p %p %p %p %p %p
", value, x, y, z, w, u);
    return value;
}
struct Test
{
    void *local;
    void *operator()(void *value, void *x, void *y, void *z, void *w, void *u)
    {
        if (u > 0) { (*this)(value, x, y, z, w, (void *)((size_t)u - 1)); }
        printf("Test::operator() called! %p %p %p %p %p %p %p
", local, value, x, y, z, w, u);
        return value;
    }
};
int main()
{
    unsigned char thunk[1024]; unsigned long old;
    VirtualProtect(&thunk, sizeof(thunk), PAGE_EXECUTE_READWRITE, &old);
    void *args[] = { (void *)0xBAADF00DBAADF001, (void *)0xBAADF00DBAADF002 };
    void *(Test::*f)(void *value, void *x, void *y, void *z, void *w, void *u) = &Test::operator();
    Test obj = { (void *)0x1234 };
    assert(sizeof(f) == sizeof(void (*)()));  // virtual function are too big, they're not supported :(
    vbind(*(void *(**)())&f, 1 + 6, thunk, 1 + 1, args, sizeof(args) / sizeof(*args), true);
    ((void *(*)(void *, int, int, int, int))&thunk)(&obj, 3, 4, 5, 6);
    vbind((void *(*)())test, 6, thunk, 1, args, sizeof(args) / sizeof(*args), false);
    ((void *(*)(int, int, int, int))&thunk)(3, 4, 5, 6);
}

这篇关于如何在 x86 和 x64 中调用函数?(就像 C++ 中的 std::bind,但动态的)的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆